From 88e1d86c27cc5d9ca6062cd645d7c59a7cf9b73c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 May 2026 09:34:30 +0000 Subject: [PATCH 01/11] Release preparation for version 2.25.4 --- actions/ql/lib/CHANGELOG.md | 4 ++ .../ql/lib/change-notes/released/0.4.35.md | 3 ++ actions/ql/lib/codeql-pack.release.yml | 2 +- actions/ql/lib/qlpack.yml | 2 +- actions/ql/src/CHANGELOG.md | 6 ++- .../ql/src/change-notes/released/0.6.27.md | 3 ++ actions/ql/src/codeql-pack.release.yml | 2 +- actions/ql/src/qlpack.yml | 2 +- cpp/ql/lib/CHANGELOG.md | 11 ++++++ .../2026-03-20-data-extensions-barriers.md | 4 -- .../change-notes/2026-03-28-switch-stmt.md | 4 -- cpp/ql/lib/change-notes/2026-04-28-strsafe.md | 4 -- cpp/ql/lib/change-notes/released/10.1.0.md | 10 +++++ cpp/ql/lib/codeql-pack.release.yml | 2 +- cpp/ql/lib/qlpack.yml | 2 +- cpp/ql/src/CHANGELOG.md | 6 ++- cpp/ql/src/change-notes/released/1.6.2.md | 3 ++ cpp/ql/src/codeql-pack.release.yml | 2 +- cpp/ql/src/qlpack.yml | 2 +- .../ql/campaigns/Solorigate/lib/CHANGELOG.md | 4 ++ .../lib/change-notes/released/1.7.66.md | 3 ++ .../Solorigate/lib/codeql-pack.release.yml | 2 +- csharp/ql/campaigns/Solorigate/lib/qlpack.yml | 2 +- .../ql/campaigns/Solorigate/src/CHANGELOG.md | 4 ++ .../src/change-notes/released/1.7.66.md | 3 ++ .../Solorigate/src/codeql-pack.release.yml | 2 +- csharp/ql/campaigns/Solorigate/src/qlpack.yml | 2 +- csharp/ql/lib/CHANGELOG.md | 39 +++++++++++++++++++ ...26-03-06-compound-assignment-operations.md | 4 -- .../2026-03-20-data-extensions-barriers.md | 4 -- .../2026-04-01-asp-remote-sources.md | 4 -- .../2026-04-10-nuget-feed-usage-in-bmn.md | 4 -- csharp/ql/lib/change-notes/2026-04-13-cfg.md | 20 ---------- .../2026-05-01-ssa-replacement.md | 4 -- csharp/ql/lib/change-notes/released/6.0.0.md | 38 ++++++++++++++++++ csharp/ql/lib/codeql-pack.release.yml | 2 +- csharp/ql/lib/qlpack.yml | 2 +- csharp/ql/src/CHANGELOG.md | 4 ++ csharp/ql/src/change-notes/released/1.7.2.md | 3 ++ csharp/ql/src/codeql-pack.release.yml | 2 +- csharp/ql/src/qlpack.yml | 2 +- go/ql/consistency-queries/CHANGELOG.md | 4 ++ .../change-notes/released/1.0.49.md | 3 ++ .../codeql-pack.release.yml | 2 +- go/ql/consistency-queries/qlpack.yml | 2 +- go/ql/lib/CHANGELOG.md | 6 +++ .../7.1.0.md} | 7 ++-- go/ql/lib/codeql-pack.release.yml | 2 +- go/ql/lib/qlpack.yml | 2 +- go/ql/src/CHANGELOG.md | 4 ++ go/ql/src/change-notes/released/1.6.2.md | 3 ++ go/ql/src/codeql-pack.release.yml | 2 +- go/ql/src/qlpack.yml | 2 +- java/ql/lib/CHANGELOG.md | 15 +++++++ .../2026-03-20-data-extensions-barriers.md | 4 -- .../2026-04-04-path-injection-torealpath.md | 4 -- .../2026-04-04-sensitive-log-fp-reduction.md | 4 -- ...2026-04-04-sensitive-log-hash-sanitizer.md | 4 -- ...026-04-04-trust-boundary-regexp-barrier.md | 4 -- .../2026-04-18-partial-path-traversal-fix.md | 4 -- ...026-04-23-hibernate-queryproducer-sinks.md | 4 -- java/ql/lib/change-notes/released/9.1.0.md | 14 +++++++ java/ql/lib/codeql-pack.release.yml | 2 +- java/ql/lib/qlpack.yml | 2 +- java/ql/src/CHANGELOG.md | 4 ++ java/ql/src/change-notes/released/1.11.2.md | 3 ++ java/ql/src/codeql-pack.release.yml | 2 +- java/ql/src/qlpack.yml | 2 +- javascript/ql/lib/CHANGELOG.md | 7 ++++ .../2026-03-20-data-extensions-barriers.md | 4 -- .../2.7.0.md} | 8 ++-- javascript/ql/lib/codeql-pack.release.yml | 2 +- javascript/ql/lib/qlpack.yml | 2 +- javascript/ql/src/CHANGELOG.md | 4 ++ .../ql/src/change-notes/released/2.3.9.md | 3 ++ javascript/ql/src/codeql-pack.release.yml | 2 +- javascript/ql/src/qlpack.yml | 2 +- misc/suite-helpers/CHANGELOG.md | 4 ++ .../change-notes/released/1.0.49.md | 3 ++ misc/suite-helpers/codeql-pack.release.yml | 2 +- misc/suite-helpers/qlpack.yml | 2 +- python/ql/lib/CHANGELOG.md | 10 +++++ .../2026-03-20-data-extensions-barriers.md | 4 -- ...6-04-10-support-comprehension-unpacking.md | 5 --- python/ql/lib/change-notes/released/7.1.0.md | 9 +++++ python/ql/lib/codeql-pack.release.yml | 2 +- python/ql/lib/qlpack.yml | 2 +- python/ql/src/CHANGELOG.md | 4 ++ python/ql/src/change-notes/released/1.8.2.md | 3 ++ python/ql/src/codeql-pack.release.yml | 2 +- python/ql/src/qlpack.yml | 2 +- ruby/ql/lib/CHANGELOG.md | 6 +++ .../5.2.0.md} | 7 ++-- ruby/ql/lib/codeql-pack.release.yml | 2 +- ruby/ql/lib/qlpack.yml | 2 +- ruby/ql/src/CHANGELOG.md | 4 ++ ruby/ql/src/change-notes/released/1.6.2.md | 3 ++ ruby/ql/src/codeql-pack.release.yml | 2 +- ruby/ql/src/qlpack.yml | 2 +- rust/ql/lib/CHANGELOG.md | 6 +++ .../0.2.13.md} | 7 ++-- rust/ql/lib/codeql-pack.release.yml | 2 +- rust/ql/lib/qlpack.yml | 2 +- rust/ql/src/CHANGELOG.md | 4 ++ rust/ql/src/change-notes/released/0.1.34.md | 3 ++ rust/ql/src/codeql-pack.release.yml | 2 +- rust/ql/src/qlpack.yml | 2 +- shared/concepts/CHANGELOG.md | 4 ++ .../concepts/change-notes/released/0.0.23.md | 3 ++ shared/concepts/codeql-pack.release.yml | 2 +- shared/concepts/qlpack.yml | 2 +- shared/controlflow/CHANGELOG.md | 4 ++ .../change-notes/released/2.0.33.md | 3 ++ shared/controlflow/codeql-pack.release.yml | 2 +- shared/controlflow/qlpack.yml | 2 +- shared/dataflow/CHANGELOG.md | 4 ++ .../dataflow/change-notes/released/2.1.5.md | 3 ++ shared/dataflow/codeql-pack.release.yml | 2 +- shared/dataflow/qlpack.yml | 2 +- shared/mad/CHANGELOG.md | 4 ++ shared/mad/change-notes/released/1.0.49.md | 3 ++ shared/mad/codeql-pack.release.yml | 2 +- shared/mad/qlpack.yml | 2 +- shared/quantum/CHANGELOG.md | 4 ++ .../quantum/change-notes/released/0.0.27.md | 3 ++ shared/quantum/codeql-pack.release.yml | 2 +- shared/quantum/qlpack.yml | 2 +- shared/rangeanalysis/CHANGELOG.md | 4 ++ .../change-notes/released/1.0.49.md | 3 ++ shared/rangeanalysis/codeql-pack.release.yml | 2 +- shared/rangeanalysis/qlpack.yml | 2 +- shared/regex/CHANGELOG.md | 4 ++ shared/regex/change-notes/released/1.0.49.md | 3 ++ shared/regex/codeql-pack.release.yml | 2 +- shared/regex/qlpack.yml | 2 +- shared/ssa/CHANGELOG.md | 4 ++ shared/ssa/change-notes/released/2.0.25.md | 3 ++ shared/ssa/codeql-pack.release.yml | 2 +- shared/ssa/qlpack.yml | 2 +- shared/threat-models/CHANGELOG.md | 4 ++ .../change-notes/released/1.0.49.md | 3 ++ shared/threat-models/codeql-pack.release.yml | 2 +- shared/threat-models/qlpack.yml | 2 +- shared/tutorial/CHANGELOG.md | 4 ++ .../tutorial/change-notes/released/1.0.49.md | 3 ++ shared/tutorial/codeql-pack.release.yml | 2 +- shared/tutorial/qlpack.yml | 2 +- shared/typeflow/CHANGELOG.md | 4 ++ .../typeflow/change-notes/released/1.0.49.md | 3 ++ shared/typeflow/codeql-pack.release.yml | 2 +- shared/typeflow/qlpack.yml | 2 +- shared/typeinference/CHANGELOG.md | 4 ++ .../change-notes/released/0.0.30.md | 3 ++ shared/typeinference/codeql-pack.release.yml | 2 +- shared/typeinference/qlpack.yml | 2 +- shared/typetracking/CHANGELOG.md | 4 ++ .../change-notes/released/2.0.33.md | 3 ++ shared/typetracking/codeql-pack.release.yml | 2 +- shared/typetracking/qlpack.yml | 2 +- shared/typos/CHANGELOG.md | 4 ++ shared/typos/change-notes/released/1.0.49.md | 3 ++ shared/typos/codeql-pack.release.yml | 2 +- shared/typos/qlpack.yml | 2 +- shared/util/CHANGELOG.md | 4 ++ shared/util/change-notes/released/2.0.36.md | 3 ++ shared/util/codeql-pack.release.yml | 2 +- shared/util/qlpack.yml | 2 +- shared/xml/CHANGELOG.md | 4 ++ shared/xml/change-notes/released/1.0.49.md | 3 ++ shared/xml/codeql-pack.release.yml | 2 +- shared/xml/qlpack.yml | 2 +- shared/yaml/CHANGELOG.md | 4 ++ shared/yaml/change-notes/released/1.0.49.md | 3 ++ shared/yaml/codeql-pack.release.yml | 2 +- shared/yaml/qlpack.yml | 2 +- swift/ql/lib/CHANGELOG.md | 10 +++++ .../change-notes/2026-04-20-swift-6.3.1.md | 4 -- .../6.5.0.md} | 11 ++++-- swift/ql/lib/codeql-pack.release.yml | 2 +- swift/ql/lib/qlpack.yml | 2 +- swift/ql/src/CHANGELOG.md | 4 ++ swift/ql/src/change-notes/released/1.3.2.md | 3 ++ swift/ql/src/codeql-pack.release.yml | 2 +- swift/ql/src/qlpack.yml | 2 +- 184 files changed, 514 insertions(+), 196 deletions(-) create mode 100644 actions/ql/lib/change-notes/released/0.4.35.md create mode 100644 actions/ql/src/change-notes/released/0.6.27.md delete mode 100644 cpp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md delete mode 100644 cpp/ql/lib/change-notes/2026-03-28-switch-stmt.md delete mode 100644 cpp/ql/lib/change-notes/2026-04-28-strsafe.md create mode 100644 cpp/ql/lib/change-notes/released/10.1.0.md create mode 100644 cpp/ql/src/change-notes/released/1.6.2.md create mode 100644 csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.7.66.md create mode 100644 csharp/ql/campaigns/Solorigate/src/change-notes/released/1.7.66.md delete mode 100644 csharp/ql/lib/change-notes/2026-03-06-compound-assignment-operations.md delete mode 100644 csharp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md delete mode 100644 csharp/ql/lib/change-notes/2026-04-01-asp-remote-sources.md delete mode 100644 csharp/ql/lib/change-notes/2026-04-10-nuget-feed-usage-in-bmn.md delete mode 100644 csharp/ql/lib/change-notes/2026-04-13-cfg.md delete mode 100644 csharp/ql/lib/change-notes/2026-05-01-ssa-replacement.md create mode 100644 csharp/ql/lib/change-notes/released/6.0.0.md create mode 100644 csharp/ql/src/change-notes/released/1.7.2.md create mode 100644 go/ql/consistency-queries/change-notes/released/1.0.49.md rename go/ql/lib/change-notes/{2026-03-20-data-extensions-barriers.md => released/7.1.0.md} (89%) create mode 100644 go/ql/src/change-notes/released/1.6.2.md delete mode 100644 java/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md delete mode 100644 java/ql/lib/change-notes/2026-04-04-path-injection-torealpath.md delete mode 100644 java/ql/lib/change-notes/2026-04-04-sensitive-log-fp-reduction.md delete mode 100644 java/ql/lib/change-notes/2026-04-04-sensitive-log-hash-sanitizer.md delete mode 100644 java/ql/lib/change-notes/2026-04-04-trust-boundary-regexp-barrier.md delete mode 100644 java/ql/lib/change-notes/2026-04-18-partial-path-traversal-fix.md delete mode 100644 java/ql/lib/change-notes/2026-04-23-hibernate-queryproducer-sinks.md create mode 100644 java/ql/lib/change-notes/released/9.1.0.md create mode 100644 java/ql/src/change-notes/released/1.11.2.md delete mode 100644 javascript/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md rename javascript/ql/lib/change-notes/{2026-04-12-vercel-node.md => released/2.7.0.md} (58%) create mode 100644 javascript/ql/src/change-notes/released/2.3.9.md create mode 100644 misc/suite-helpers/change-notes/released/1.0.49.md delete mode 100644 python/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md delete mode 100644 python/ql/lib/change-notes/2026-04-10-support-comprehension-unpacking.md create mode 100644 python/ql/lib/change-notes/released/7.1.0.md create mode 100644 python/ql/src/change-notes/released/1.8.2.md rename ruby/ql/lib/change-notes/{2026-03-20-data-extensions-barriers.md => released/5.2.0.md} (89%) create mode 100644 ruby/ql/src/change-notes/released/1.6.2.md rename rust/ql/lib/change-notes/{2026-03-20-data-extensions-barriers.md => released/0.2.13.md} (73%) create mode 100644 rust/ql/src/change-notes/released/0.1.34.md create mode 100644 shared/concepts/change-notes/released/0.0.23.md create mode 100644 shared/controlflow/change-notes/released/2.0.33.md create mode 100644 shared/dataflow/change-notes/released/2.1.5.md create mode 100644 shared/mad/change-notes/released/1.0.49.md create mode 100644 shared/quantum/change-notes/released/0.0.27.md create mode 100644 shared/rangeanalysis/change-notes/released/1.0.49.md create mode 100644 shared/regex/change-notes/released/1.0.49.md create mode 100644 shared/ssa/change-notes/released/2.0.25.md create mode 100644 shared/threat-models/change-notes/released/1.0.49.md create mode 100644 shared/tutorial/change-notes/released/1.0.49.md create mode 100644 shared/typeflow/change-notes/released/1.0.49.md create mode 100644 shared/typeinference/change-notes/released/0.0.30.md create mode 100644 shared/typetracking/change-notes/released/2.0.33.md create mode 100644 shared/typos/change-notes/released/1.0.49.md create mode 100644 shared/util/change-notes/released/2.0.36.md create mode 100644 shared/xml/change-notes/released/1.0.49.md create mode 100644 shared/yaml/change-notes/released/1.0.49.md delete mode 100644 swift/ql/lib/change-notes/2026-04-20-swift-6.3.1.md rename swift/ql/lib/change-notes/{2026-04-17-fixed-array.md => released/6.5.0.md} (63%) create mode 100644 swift/ql/src/change-notes/released/1.3.2.md diff --git a/actions/ql/lib/CHANGELOG.md b/actions/ql/lib/CHANGELOG.md index e84ba38d180..011af903a2a 100644 --- a/actions/ql/lib/CHANGELOG.md +++ b/actions/ql/lib/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.4.35 + +No user-facing changes. + ## 0.4.34 ### Minor Analysis Improvements diff --git a/actions/ql/lib/change-notes/released/0.4.35.md b/actions/ql/lib/change-notes/released/0.4.35.md new file mode 100644 index 00000000000..3274ffc88e4 --- /dev/null +++ b/actions/ql/lib/change-notes/released/0.4.35.md @@ -0,0 +1,3 @@ +## 0.4.35 + +No user-facing changes. diff --git a/actions/ql/lib/codeql-pack.release.yml b/actions/ql/lib/codeql-pack.release.yml index 69fb16e4c39..524302c92d3 100644 --- a/actions/ql/lib/codeql-pack.release.yml +++ b/actions/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.4.34 +lastReleaseVersion: 0.4.35 diff --git a/actions/ql/lib/qlpack.yml b/actions/ql/lib/qlpack.yml index 6e78fc546b3..a20f2e7a507 100644 --- a/actions/ql/lib/qlpack.yml +++ b/actions/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/actions-all -version: 0.4.35-dev +version: 0.4.35 library: true warnOnImplicitThis: true dependencies: diff --git a/actions/ql/src/CHANGELOG.md b/actions/ql/src/CHANGELOG.md index 96f8d266206..0a7fa1ce830 100644 --- a/actions/ql/src/CHANGELOG.md +++ b/actions/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.6.27 + +No user-facing changes. + ## 0.6.26 ### Major Analysis Improvements @@ -173,7 +177,7 @@ No user-facing changes. * `actions/if-expression-always-true/critical` * `actions/if-expression-always-true/high` * `actions/unnecessary-use-of-advanced-config` - + * The following query has been moved from the `code-scanning` suite to the `security-extended` suite. Any existing alerts for this query will be closed automatically unless the analysis is configured to use the `security-extended` suite. diff --git a/actions/ql/src/change-notes/released/0.6.27.md b/actions/ql/src/change-notes/released/0.6.27.md new file mode 100644 index 00000000000..52d3a10fd1f --- /dev/null +++ b/actions/ql/src/change-notes/released/0.6.27.md @@ -0,0 +1,3 @@ +## 0.6.27 + +No user-facing changes. diff --git a/actions/ql/src/codeql-pack.release.yml b/actions/ql/src/codeql-pack.release.yml index e83bac0046e..0748b12112f 100644 --- a/actions/ql/src/codeql-pack.release.yml +++ b/actions/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.6.26 +lastReleaseVersion: 0.6.27 diff --git a/actions/ql/src/qlpack.yml b/actions/ql/src/qlpack.yml index c815afc498c..bcc7fe06a3b 100644 --- a/actions/ql/src/qlpack.yml +++ b/actions/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/actions-queries -version: 0.6.27-dev +version: 0.6.27 library: false warnOnImplicitThis: true groups: [actions, queries] diff --git a/cpp/ql/lib/CHANGELOG.md b/cpp/ql/lib/CHANGELOG.md index 2cd1bcede35..828b8779117 100644 --- a/cpp/ql/lib/CHANGELOG.md +++ b/cpp/ql/lib/CHANGELOG.md @@ -1,3 +1,14 @@ +## 10.1.0 + +### New Features + +* A new predicate `getSwitchCase` was added to the `SwitchStmt` class, which yields the `n`th `case` statement from a `switch` statement. +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C and C++](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-cpp/). + +### Minor Analysis Improvements + +* Added taint flow models for the `Strsafe.h` header from the Windows SDK. + ## 10.0.0 ### Breaking Changes diff --git a/cpp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/cpp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md deleted file mode 100644 index 30f0092a4e9..00000000000 --- a/cpp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C and C++](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-cpp/). diff --git a/cpp/ql/lib/change-notes/2026-03-28-switch-stmt.md b/cpp/ql/lib/change-notes/2026-03-28-switch-stmt.md deleted file mode 100644 index 4b0d7528d47..00000000000 --- a/cpp/ql/lib/change-notes/2026-03-28-switch-stmt.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* A new predicate `getSwitchCase` was added to the `SwitchStmt` class, which yields the `n`th `case` statement from a `switch` statement. diff --git a/cpp/ql/lib/change-notes/2026-04-28-strsafe.md b/cpp/ql/lib/change-notes/2026-04-28-strsafe.md deleted file mode 100644 index 9ef3fab0853..00000000000 --- a/cpp/ql/lib/change-notes/2026-04-28-strsafe.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* Added taint flow models for the `Strsafe.h` header from the Windows SDK. \ No newline at end of file diff --git a/cpp/ql/lib/change-notes/released/10.1.0.md b/cpp/ql/lib/change-notes/released/10.1.0.md new file mode 100644 index 00000000000..45d153b4896 --- /dev/null +++ b/cpp/ql/lib/change-notes/released/10.1.0.md @@ -0,0 +1,10 @@ +## 10.1.0 + +### New Features + +* A new predicate `getSwitchCase` was added to the `SwitchStmt` class, which yields the `n`th `case` statement from a `switch` statement. +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C and C++](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-cpp/). + +### Minor Analysis Improvements + +* Added taint flow models for the `Strsafe.h` header from the Windows SDK. diff --git a/cpp/ql/lib/codeql-pack.release.yml b/cpp/ql/lib/codeql-pack.release.yml index 28758256b94..a85b2d36d7c 100644 --- a/cpp/ql/lib/codeql-pack.release.yml +++ b/cpp/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 10.0.0 +lastReleaseVersion: 10.1.0 diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index 8a9d60a7fa9..e3b9f7c3363 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/cpp-all -version: 10.0.1-dev +version: 10.1.0 groups: cpp dbscheme: semmlecode.cpp.dbscheme extractor: cpp diff --git a/cpp/ql/src/CHANGELOG.md b/cpp/ql/src/CHANGELOG.md index e677f584416..3a9d4fae927 100644 --- a/cpp/ql/src/CHANGELOG.md +++ b/cpp/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.6.2 + +No user-facing changes. + ## 1.6.1 ### Minor Analysis Improvements @@ -366,7 +370,7 @@ No user-facing changes. ### Minor Analysis Improvements * The "non-constant format string" query (`cpp/non-constant-format`) has been updated to produce fewer false positives. -* Added dataflow models for the `gettext` function variants. +* Added dataflow models for the `gettext` function variants. ## 0.9.4 diff --git a/cpp/ql/src/change-notes/released/1.6.2.md b/cpp/ql/src/change-notes/released/1.6.2.md new file mode 100644 index 00000000000..bbe3747556f --- /dev/null +++ b/cpp/ql/src/change-notes/released/1.6.2.md @@ -0,0 +1,3 @@ +## 1.6.2 + +No user-facing changes. diff --git a/cpp/ql/src/codeql-pack.release.yml b/cpp/ql/src/codeql-pack.release.yml index ef7a789e0cf..5f5beb68311 100644 --- a/cpp/ql/src/codeql-pack.release.yml +++ b/cpp/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.6.1 +lastReleaseVersion: 1.6.2 diff --git a/cpp/ql/src/qlpack.yml b/cpp/ql/src/qlpack.yml index 714167434c8..83d7a32e6d4 100644 --- a/cpp/ql/src/qlpack.yml +++ b/cpp/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/cpp-queries -version: 1.6.2-dev +version: 1.6.2 groups: - cpp - queries diff --git a/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md b/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md index 166a94bd88d..4cb7b556968 100644 --- a/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md +++ b/csharp/ql/campaigns/Solorigate/lib/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.7.66 + +No user-facing changes. + ## 1.7.65 No user-facing changes. diff --git a/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.7.66.md b/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.7.66.md new file mode 100644 index 00000000000..7fc1a46a66e --- /dev/null +++ b/csharp/ql/campaigns/Solorigate/lib/change-notes/released/1.7.66.md @@ -0,0 +1,3 @@ +## 1.7.66 + +No user-facing changes. diff --git a/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml b/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml index bf581427d29..7d0a2c0bc07 100644 --- a/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml +++ b/csharp/ql/campaigns/Solorigate/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.7.65 +lastReleaseVersion: 1.7.66 diff --git a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml index 9d0e0ffd4f9..88ba74212c7 100644 --- a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml +++ b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-solorigate-all -version: 1.7.66-dev +version: 1.7.66 groups: - csharp - solorigate diff --git a/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md b/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md index 166a94bd88d..4cb7b556968 100644 --- a/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md +++ b/csharp/ql/campaigns/Solorigate/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.7.66 + +No user-facing changes. + ## 1.7.65 No user-facing changes. diff --git a/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.7.66.md b/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.7.66.md new file mode 100644 index 00000000000..7fc1a46a66e --- /dev/null +++ b/csharp/ql/campaigns/Solorigate/src/change-notes/released/1.7.66.md @@ -0,0 +1,3 @@ +## 1.7.66 + +No user-facing changes. diff --git a/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml b/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml index bf581427d29..7d0a2c0bc07 100644 --- a/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml +++ b/csharp/ql/campaigns/Solorigate/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.7.65 +lastReleaseVersion: 1.7.66 diff --git a/csharp/ql/campaigns/Solorigate/src/qlpack.yml b/csharp/ql/campaigns/Solorigate/src/qlpack.yml index f5203f4e443..fee050486c9 100644 --- a/csharp/ql/campaigns/Solorigate/src/qlpack.yml +++ b/csharp/ql/campaigns/Solorigate/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-solorigate-queries -version: 1.7.66-dev +version: 1.7.66 groups: - csharp - solorigate diff --git a/csharp/ql/lib/CHANGELOG.md b/csharp/ql/lib/CHANGELOG.md index 2e3f6c137ee..1d7f42a9075 100644 --- a/csharp/ql/lib/CHANGELOG.md +++ b/csharp/ql/lib/CHANGELOG.md @@ -1,3 +1,42 @@ +## 6.0.0 + +### Breaking Changes + +* The C# control flow graph (CFG) implementation has been completely + rewritten. The CFG now includes additional nodes to more accurately represent + certain constructs. This also means that any existing code that implicitly + relies on very specific details about the CFG may need to be updated. + The CFG no longer uses splitting, which means that AST nodes now have a unique + CFG node representation. + Additionally, the following breaking changes have been made: + - `ControlFlow::Node` has been renamed to `ControlFlowNode`. + - `ControlFlow::Nodes` has been renamed to `ControlFlowNodes`. + - `BasicBlock.getCallable` has been renamed to `BasicBlock.getEnclosingCallable`. + - `BasicBlocks.qll` has been deleted. + - `ControlFlowNode.getAstNode` has changed its meaning. The AST-to-CFG + mapping remains one-to-many, but now for a different reason. It used to be + because of splitting, but now it's because of additional "helper" CFG + nodes. To get the (now canonical) CFG node for a given AST node, use + `ControlFlowNode.asExpr()` or `ControlFlowNode.asStmt()` or + `ControlFlowElement.getControlFlowNode()` instead. + +### Deprecated APIs + +* The QL classes in the C# SSA library have been renamed to improve consistency between languages. Any custom QL code that makes use of SSA needs to be updated. The old classes have been deprecated and include more detailed migration instructions in their qldoc. + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C#](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-csharp/). + +### Major Analysis Improvements + +* When resolving dependencies in `build-mode: none`, `dotnet restore` now explicitly receives reachable NuGet feeds configured in `nuget.config` when feed responsiveness checking is enabled (the default), and any private registries directly, improving reliability when default feeds are unavailable or restricted. + +### Minor Analysis Improvements + +* Expanded ASP and ASP.NET remote source modeling to cover additional sources, including fields of tainted parameters as well as properties and fields that become tainted transitively. +* C# 14: Added support for user-defined compound assignment operators. + ## 5.5.0 ### Deprecated APIs diff --git a/csharp/ql/lib/change-notes/2026-03-06-compound-assignment-operations.md b/csharp/ql/lib/change-notes/2026-03-06-compound-assignment-operations.md deleted file mode 100644 index f7e68b9b7d7..00000000000 --- a/csharp/ql/lib/change-notes/2026-03-06-compound-assignment-operations.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* C# 14: Added support for user-defined compound assignment operators. diff --git a/csharp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/csharp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md deleted file mode 100644 index 6408acc7dae..00000000000 --- a/csharp/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C#](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-csharp/). diff --git a/csharp/ql/lib/change-notes/2026-04-01-asp-remote-sources.md b/csharp/ql/lib/change-notes/2026-04-01-asp-remote-sources.md deleted file mode 100644 index 52f3f721e9f..00000000000 --- a/csharp/ql/lib/change-notes/2026-04-01-asp-remote-sources.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* Expanded ASP and ASP.NET remote source modeling to cover additional sources, including fields of tainted parameters as well as properties and fields that become tainted transitively. diff --git a/csharp/ql/lib/change-notes/2026-04-10-nuget-feed-usage-in-bmn.md b/csharp/ql/lib/change-notes/2026-04-10-nuget-feed-usage-in-bmn.md deleted file mode 100644 index a4282d0468d..00000000000 --- a/csharp/ql/lib/change-notes/2026-04-10-nuget-feed-usage-in-bmn.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: majorAnalysis ---- -* When resolving dependencies in `build-mode: none`, `dotnet restore` now explicitly receives reachable NuGet feeds configured in `nuget.config` when feed responsiveness checking is enabled (the default), and any private registries directly, improving reliability when default feeds are unavailable or restricted. diff --git a/csharp/ql/lib/change-notes/2026-04-13-cfg.md b/csharp/ql/lib/change-notes/2026-04-13-cfg.md deleted file mode 100644 index 9c588fbcfa8..00000000000 --- a/csharp/ql/lib/change-notes/2026-04-13-cfg.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -category: breaking ---- -* The C# control flow graph (CFG) implementation has been completely - rewritten. The CFG now includes additional nodes to more accurately represent - certain constructs. This also means that any existing code that implicitly - relies on very specific details about the CFG may need to be updated. - The CFG no longer uses splitting, which means that AST nodes now have a unique - CFG node representation. - Additionally, the following breaking changes have been made: - - `ControlFlow::Node` has been renamed to `ControlFlowNode`. - - `ControlFlow::Nodes` has been renamed to `ControlFlowNodes`. - - `BasicBlock.getCallable` has been renamed to `BasicBlock.getEnclosingCallable`. - - `BasicBlocks.qll` has been deleted. - - `ControlFlowNode.getAstNode` has changed its meaning. The AST-to-CFG - mapping remains one-to-many, but now for a different reason. It used to be - because of splitting, but now it's because of additional "helper" CFG - nodes. To get the (now canonical) CFG node for a given AST node, use - `ControlFlowNode.asExpr()` or `ControlFlowNode.asStmt()` or - `ControlFlowElement.getControlFlowNode()` instead. diff --git a/csharp/ql/lib/change-notes/2026-05-01-ssa-replacement.md b/csharp/ql/lib/change-notes/2026-05-01-ssa-replacement.md deleted file mode 100644 index 27988f36f2f..00000000000 --- a/csharp/ql/lib/change-notes/2026-05-01-ssa-replacement.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: deprecated ---- -* The QL classes in the C# SSA library have been renamed to improve consistency between languages. Any custom QL code that makes use of SSA needs to be updated. The old classes have been deprecated and include more detailed migration instructions in their qldoc. diff --git a/csharp/ql/lib/change-notes/released/6.0.0.md b/csharp/ql/lib/change-notes/released/6.0.0.md new file mode 100644 index 00000000000..e249567d095 --- /dev/null +++ b/csharp/ql/lib/change-notes/released/6.0.0.md @@ -0,0 +1,38 @@ +## 6.0.0 + +### Breaking Changes + +* The C# control flow graph (CFG) implementation has been completely + rewritten. The CFG now includes additional nodes to more accurately represent + certain constructs. This also means that any existing code that implicitly + relies on very specific details about the CFG may need to be updated. + The CFG no longer uses splitting, which means that AST nodes now have a unique + CFG node representation. + Additionally, the following breaking changes have been made: + - `ControlFlow::Node` has been renamed to `ControlFlowNode`. + - `ControlFlow::Nodes` has been renamed to `ControlFlowNodes`. + - `BasicBlock.getCallable` has been renamed to `BasicBlock.getEnclosingCallable`. + - `BasicBlocks.qll` has been deleted. + - `ControlFlowNode.getAstNode` has changed its meaning. The AST-to-CFG + mapping remains one-to-many, but now for a different reason. It used to be + because of splitting, but now it's because of additional "helper" CFG + nodes. To get the (now canonical) CFG node for a given AST node, use + `ControlFlowNode.asExpr()` or `ControlFlowNode.asStmt()` or + `ControlFlowElement.getControlFlowNode()` instead. + +### Deprecated APIs + +* The QL classes in the C# SSA library have been renamed to improve consistency between languages. Any custom QL code that makes use of SSA needs to be updated. The old classes have been deprecated and include more detailed migration instructions in their qldoc. + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for C#](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-csharp/). + +### Major Analysis Improvements + +* When resolving dependencies in `build-mode: none`, `dotnet restore` now explicitly receives reachable NuGet feeds configured in `nuget.config` when feed responsiveness checking is enabled (the default), and any private registries directly, improving reliability when default feeds are unavailable or restricted. + +### Minor Analysis Improvements + +* Expanded ASP and ASP.NET remote source modeling to cover additional sources, including fields of tainted parameters as well as properties and fields that become tainted transitively. +* C# 14: Added support for user-defined compound assignment operators. diff --git a/csharp/ql/lib/codeql-pack.release.yml b/csharp/ql/lib/codeql-pack.release.yml index 4b8cf9533c1..f8c4fa43ccb 100644 --- a/csharp/ql/lib/codeql-pack.release.yml +++ b/csharp/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 5.5.0 +lastReleaseVersion: 6.0.0 diff --git a/csharp/ql/lib/qlpack.yml b/csharp/ql/lib/qlpack.yml index 26e332652cd..daded1ee71e 100644 --- a/csharp/ql/lib/qlpack.yml +++ b/csharp/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-all -version: 5.5.1-dev +version: 6.0.0 groups: csharp dbscheme: semmlecode.csharp.dbscheme extractor: csharp diff --git a/csharp/ql/src/CHANGELOG.md b/csharp/ql/src/CHANGELOG.md index cdab7134185..32243acfb97 100644 --- a/csharp/ql/src/CHANGELOG.md +++ b/csharp/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.7.2 + +No user-facing changes. + ## 1.7.1 ### Minor Analysis Improvements diff --git a/csharp/ql/src/change-notes/released/1.7.2.md b/csharp/ql/src/change-notes/released/1.7.2.md new file mode 100644 index 00000000000..b950385c16d --- /dev/null +++ b/csharp/ql/src/change-notes/released/1.7.2.md @@ -0,0 +1,3 @@ +## 1.7.2 + +No user-facing changes. diff --git a/csharp/ql/src/codeql-pack.release.yml b/csharp/ql/src/codeql-pack.release.yml index 7bdec0d85c7..39bbba86c19 100644 --- a/csharp/ql/src/codeql-pack.release.yml +++ b/csharp/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.7.1 +lastReleaseVersion: 1.7.2 diff --git a/csharp/ql/src/qlpack.yml b/csharp/ql/src/qlpack.yml index 25b04cf2dc6..72d951194c3 100644 --- a/csharp/ql/src/qlpack.yml +++ b/csharp/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-queries -version: 1.7.2-dev +version: 1.7.2 groups: - csharp - queries diff --git a/go/ql/consistency-queries/CHANGELOG.md b/go/ql/consistency-queries/CHANGELOG.md index a3aa00d4872..a6b6055373a 100644 --- a/go/ql/consistency-queries/CHANGELOG.md +++ b/go/ql/consistency-queries/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/go/ql/consistency-queries/change-notes/released/1.0.49.md b/go/ql/consistency-queries/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/go/ql/consistency-queries/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/go/ql/consistency-queries/codeql-pack.release.yml b/go/ql/consistency-queries/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/go/ql/consistency-queries/codeql-pack.release.yml +++ b/go/ql/consistency-queries/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/go/ql/consistency-queries/qlpack.yml b/go/ql/consistency-queries/qlpack.yml index a82ec95583b..73a837bd0e0 100644 --- a/go/ql/consistency-queries/qlpack.yml +++ b/go/ql/consistency-queries/qlpack.yml @@ -1,5 +1,5 @@ name: codeql-go-consistency-queries -version: 1.0.49-dev +version: 1.0.49 groups: - go - queries diff --git a/go/ql/lib/CHANGELOG.md b/go/ql/lib/CHANGELOG.md index 737d08654b8..5043d924be0 100644 --- a/go/ql/lib/CHANGELOG.md +++ b/go/ql/lib/CHANGELOG.md @@ -1,3 +1,9 @@ +## 7.1.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Go](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-go/). + ## 7.0.6 No user-facing changes. diff --git a/go/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/go/ql/lib/change-notes/released/7.1.0.md similarity index 89% rename from go/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md rename to go/ql/lib/change-notes/released/7.1.0.md index ee1b51de861..b1f6efbf001 100644 --- a/go/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ b/go/ql/lib/change-notes/released/7.1.0.md @@ -1,4 +1,5 @@ ---- -category: feature ---- +## 7.1.0 + +### New Features + * Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Go](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-go/). diff --git a/go/ql/lib/codeql-pack.release.yml b/go/ql/lib/codeql-pack.release.yml index c7cff8c5378..dcaaa76112a 100644 --- a/go/ql/lib/codeql-pack.release.yml +++ b/go/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 7.0.6 +lastReleaseVersion: 7.1.0 diff --git a/go/ql/lib/qlpack.yml b/go/ql/lib/qlpack.yml index e191e0da688..1d1682d6108 100644 --- a/go/ql/lib/qlpack.yml +++ b/go/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/go-all -version: 7.0.7-dev +version: 7.1.0 groups: go dbscheme: go.dbscheme extractor: go diff --git a/go/ql/src/CHANGELOG.md b/go/ql/src/CHANGELOG.md index 971d478d56e..ddda5dc4829 100644 --- a/go/ql/src/CHANGELOG.md +++ b/go/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.6.2 + +No user-facing changes. + ## 1.6.1 No user-facing changes. diff --git a/go/ql/src/change-notes/released/1.6.2.md b/go/ql/src/change-notes/released/1.6.2.md new file mode 100644 index 00000000000..bbe3747556f --- /dev/null +++ b/go/ql/src/change-notes/released/1.6.2.md @@ -0,0 +1,3 @@ +## 1.6.2 + +No user-facing changes. diff --git a/go/ql/src/codeql-pack.release.yml b/go/ql/src/codeql-pack.release.yml index ef7a789e0cf..5f5beb68311 100644 --- a/go/ql/src/codeql-pack.release.yml +++ b/go/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.6.1 +lastReleaseVersion: 1.6.2 diff --git a/go/ql/src/qlpack.yml b/go/ql/src/qlpack.yml index fa7e934382a..78c75459387 100644 --- a/go/ql/src/qlpack.yml +++ b/go/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/go-queries -version: 1.6.2-dev +version: 1.6.2 groups: - go - queries diff --git a/java/ql/lib/CHANGELOG.md b/java/ql/lib/CHANGELOG.md index 2d34c791c92..2187f00c399 100644 --- a/java/ql/lib/CHANGELOG.md +++ b/java/ql/lib/CHANGELOG.md @@ -1,3 +1,18 @@ +## 9.1.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Java and Kotlin](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-java-and-kotlin/). + +### Minor Analysis Improvements + +* Added `sql-injection` sink models for the Hibernate `org.hibernate.query.QueryProducer` methods `createNativeMutationQuery`, `createMutationQuery`, and `createSelectionQuery`. +* The `java/partial-path-traversal` and `java/partial-path-traversal-from-remote` queries now correctly recognize file separator appends using `+=`. +* The `java/path-injection` and `java/zipslip` queries now recognize `Path.toRealPath()` as a path normalization sanitizer, consistent with the existing treatment of `Path.normalize()` and `File.getCanonicalPath()`. This reduces false positives for code that uses the NIO.2 API for path canonicalization. +* The `java/sensitive-log` query now excludes additional common variable naming patterns that do not hold sensitive data, reducing false positives. This includes pagination/iteration tokens (`nextToken`, `pageToken`, `continuationToken`), token metadata (`tokenType`, `tokenEndpoint`, `tokenCount`), and secret metadata (`secretName`, `secretId`, `secretVersion`). +* The `java/sensitive-log` query now treats method calls whose names contain "encrypt", "hash", or "digest" as sanitizers, consistent with the existing treatment in `java/cleartext-storage-in-log`. This reduces false positives when sensitive data is hashed or encrypted before logging. +* The `java/trust-boundary-violation` query now recognizes regular expression checks (including `String.matches()` guards and `@javax.validation.constraints.Pattern` annotations) as sanitizers, consistent with the existing treatment of ESAPI validators. This reduces false positives when input is validated against a pattern before being stored in a session. + ## 9.0.4 ### Minor Analysis Improvements diff --git a/java/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/java/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md deleted file mode 100644 index f8bcbb1fcb2..00000000000 --- a/java/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Java and Kotlin](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-java-and-kotlin/). diff --git a/java/ql/lib/change-notes/2026-04-04-path-injection-torealpath.md b/java/ql/lib/change-notes/2026-04-04-path-injection-torealpath.md deleted file mode 100644 index 8856d419bce..00000000000 --- a/java/ql/lib/change-notes/2026-04-04-path-injection-torealpath.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* The `java/path-injection` and `java/zipslip` queries now recognize `Path.toRealPath()` as a path normalization sanitizer, consistent with the existing treatment of `Path.normalize()` and `File.getCanonicalPath()`. This reduces false positives for code that uses the NIO.2 API for path canonicalization. diff --git a/java/ql/lib/change-notes/2026-04-04-sensitive-log-fp-reduction.md b/java/ql/lib/change-notes/2026-04-04-sensitive-log-fp-reduction.md deleted file mode 100644 index 15fc811360b..00000000000 --- a/java/ql/lib/change-notes/2026-04-04-sensitive-log-fp-reduction.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* The `java/sensitive-log` query now excludes additional common variable naming patterns that do not hold sensitive data, reducing false positives. This includes pagination/iteration tokens (`nextToken`, `pageToken`, `continuationToken`), token metadata (`tokenType`, `tokenEndpoint`, `tokenCount`), and secret metadata (`secretName`, `secretId`, `secretVersion`). diff --git a/java/ql/lib/change-notes/2026-04-04-sensitive-log-hash-sanitizer.md b/java/ql/lib/change-notes/2026-04-04-sensitive-log-hash-sanitizer.md deleted file mode 100644 index 7323ab09737..00000000000 --- a/java/ql/lib/change-notes/2026-04-04-sensitive-log-hash-sanitizer.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* The `java/sensitive-log` query now treats method calls whose names contain "encrypt", "hash", or "digest" as sanitizers, consistent with the existing treatment in `java/cleartext-storage-in-log`. This reduces false positives when sensitive data is hashed or encrypted before logging. diff --git a/java/ql/lib/change-notes/2026-04-04-trust-boundary-regexp-barrier.md b/java/ql/lib/change-notes/2026-04-04-trust-boundary-regexp-barrier.md deleted file mode 100644 index b80c0611b6d..00000000000 --- a/java/ql/lib/change-notes/2026-04-04-trust-boundary-regexp-barrier.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* The `java/trust-boundary-violation` query now recognizes regular expression checks (including `String.matches()` guards and `@javax.validation.constraints.Pattern` annotations) as sanitizers, consistent with the existing treatment of ESAPI validators. This reduces false positives when input is validated against a pattern before being stored in a session. diff --git a/java/ql/lib/change-notes/2026-04-18-partial-path-traversal-fix.md b/java/ql/lib/change-notes/2026-04-18-partial-path-traversal-fix.md deleted file mode 100644 index 8c15a346552..00000000000 --- a/java/ql/lib/change-notes/2026-04-18-partial-path-traversal-fix.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* The `java/partial-path-traversal` and `java/partial-path-traversal-from-remote` queries now correctly recognize file separator appends using `+=`. diff --git a/java/ql/lib/change-notes/2026-04-23-hibernate-queryproducer-sinks.md b/java/ql/lib/change-notes/2026-04-23-hibernate-queryproducer-sinks.md deleted file mode 100644 index 018ce8d348e..00000000000 --- a/java/ql/lib/change-notes/2026-04-23-hibernate-queryproducer-sinks.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: minorAnalysis ---- -* Added `sql-injection` sink models for the Hibernate `org.hibernate.query.QueryProducer` methods `createNativeMutationQuery`, `createMutationQuery`, and `createSelectionQuery`. diff --git a/java/ql/lib/change-notes/released/9.1.0.md b/java/ql/lib/change-notes/released/9.1.0.md new file mode 100644 index 00000000000..aed1a85e63f --- /dev/null +++ b/java/ql/lib/change-notes/released/9.1.0.md @@ -0,0 +1,14 @@ +## 9.1.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Java and Kotlin](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-java-and-kotlin/). + +### Minor Analysis Improvements + +* Added `sql-injection` sink models for the Hibernate `org.hibernate.query.QueryProducer` methods `createNativeMutationQuery`, `createMutationQuery`, and `createSelectionQuery`. +* The `java/partial-path-traversal` and `java/partial-path-traversal-from-remote` queries now correctly recognize file separator appends using `+=`. +* The `java/path-injection` and `java/zipslip` queries now recognize `Path.toRealPath()` as a path normalization sanitizer, consistent with the existing treatment of `Path.normalize()` and `File.getCanonicalPath()`. This reduces false positives for code that uses the NIO.2 API for path canonicalization. +* The `java/sensitive-log` query now excludes additional common variable naming patterns that do not hold sensitive data, reducing false positives. This includes pagination/iteration tokens (`nextToken`, `pageToken`, `continuationToken`), token metadata (`tokenType`, `tokenEndpoint`, `tokenCount`), and secret metadata (`secretName`, `secretId`, `secretVersion`). +* The `java/sensitive-log` query now treats method calls whose names contain "encrypt", "hash", or "digest" as sanitizers, consistent with the existing treatment in `java/cleartext-storage-in-log`. This reduces false positives when sensitive data is hashed or encrypted before logging. +* The `java/trust-boundary-violation` query now recognizes regular expression checks (including `String.matches()` guards and `@javax.validation.constraints.Pattern` annotations) as sanitizers, consistent with the existing treatment of ESAPI validators. This reduces false positives when input is validated against a pattern before being stored in a session. diff --git a/java/ql/lib/codeql-pack.release.yml b/java/ql/lib/codeql-pack.release.yml index 4bbe4f75b58..83ec2b42fcd 100644 --- a/java/ql/lib/codeql-pack.release.yml +++ b/java/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 9.0.4 +lastReleaseVersion: 9.1.0 diff --git a/java/ql/lib/qlpack.yml b/java/ql/lib/qlpack.yml index d256d2a84c1..e57412ee1fc 100644 --- a/java/ql/lib/qlpack.yml +++ b/java/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/java-all -version: 9.0.5-dev +version: 9.1.0 groups: java dbscheme: config/semmlecode.dbscheme extractor: java diff --git a/java/ql/src/CHANGELOG.md b/java/ql/src/CHANGELOG.md index 1b5d2bdad8a..fd13cbdcf83 100644 --- a/java/ql/src/CHANGELOG.md +++ b/java/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.11.2 + +No user-facing changes. + ## 1.11.1 No user-facing changes. diff --git a/java/ql/src/change-notes/released/1.11.2.md b/java/ql/src/change-notes/released/1.11.2.md new file mode 100644 index 00000000000..93a8b73f6b9 --- /dev/null +++ b/java/ql/src/change-notes/released/1.11.2.md @@ -0,0 +1,3 @@ +## 1.11.2 + +No user-facing changes. diff --git a/java/ql/src/codeql-pack.release.yml b/java/ql/src/codeql-pack.release.yml index 4ae123153bf..3e341cf85d5 100644 --- a/java/ql/src/codeql-pack.release.yml +++ b/java/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.11.1 +lastReleaseVersion: 1.11.2 diff --git a/java/ql/src/qlpack.yml b/java/ql/src/qlpack.yml index 2f2233460ba..bdaaf77ec9e 100644 --- a/java/ql/src/qlpack.yml +++ b/java/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/java-queries -version: 1.11.2-dev +version: 1.11.2 groups: - java - queries diff --git a/javascript/ql/lib/CHANGELOG.md b/javascript/ql/lib/CHANGELOG.md index 77837f46c5e..53708faed09 100644 --- a/javascript/ql/lib/CHANGELOG.md +++ b/javascript/ql/lib/CHANGELOG.md @@ -1,3 +1,10 @@ +## 2.7.0 + +### New Features + +* Added support for [`@vercel/node`](https://www.npmjs.com/package/@vercel/node) Vercel serverless functions. Handlers are recognized via the `VercelRequest`/`VercelResponse` TypeScript parameter types, and standard security queries (`js/reflected-xss`, `js/request-forgery`, `js/sql-injection`, `js/command-line-injection`, etc.) now detect vulnerabilities in Vercel API route files. +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for JavaScript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/). + ## 2.6.28 No user-facing changes. diff --git a/javascript/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/javascript/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md deleted file mode 100644 index d849f4c0c69..00000000000 --- a/javascript/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for JavaScript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/). diff --git a/javascript/ql/lib/change-notes/2026-04-12-vercel-node.md b/javascript/ql/lib/change-notes/released/2.7.0.md similarity index 58% rename from javascript/ql/lib/change-notes/2026-04-12-vercel-node.md rename to javascript/ql/lib/change-notes/released/2.7.0.md index 39802258b02..c6f46c8c0d4 100644 --- a/javascript/ql/lib/change-notes/2026-04-12-vercel-node.md +++ b/javascript/ql/lib/change-notes/released/2.7.0.md @@ -1,4 +1,6 @@ ---- -category: feature ---- +## 2.7.0 + +### New Features + * Added support for [`@vercel/node`](https://www.npmjs.com/package/@vercel/node) Vercel serverless functions. Handlers are recognized via the `VercelRequest`/`VercelResponse` TypeScript parameter types, and standard security queries (`js/reflected-xss`, `js/request-forgery`, `js/sql-injection`, `js/command-line-injection`, etc.) now detect vulnerabilities in Vercel API route files. +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for JavaScript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/). diff --git a/javascript/ql/lib/codeql-pack.release.yml b/javascript/ql/lib/codeql-pack.release.yml index 2456457874e..6a6c87f537d 100644 --- a/javascript/ql/lib/codeql-pack.release.yml +++ b/javascript/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.6.28 +lastReleaseVersion: 2.7.0 diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index b62abbbe101..97c3de6a8eb 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/javascript-all -version: 2.6.29-dev +version: 2.7.0 groups: javascript dbscheme: semmlecode.javascript.dbscheme extractor: javascript diff --git a/javascript/ql/src/CHANGELOG.md b/javascript/ql/src/CHANGELOG.md index 9b122364ffa..615c030de52 100644 --- a/javascript/ql/src/CHANGELOG.md +++ b/javascript/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.3.9 + +No user-facing changes. + ## 2.3.8 ### Minor Analysis Improvements diff --git a/javascript/ql/src/change-notes/released/2.3.9.md b/javascript/ql/src/change-notes/released/2.3.9.md new file mode 100644 index 00000000000..fac422eb6b8 --- /dev/null +++ b/javascript/ql/src/change-notes/released/2.3.9.md @@ -0,0 +1,3 @@ +## 2.3.9 + +No user-facing changes. diff --git a/javascript/ql/src/codeql-pack.release.yml b/javascript/ql/src/codeql-pack.release.yml index c68b70cb8be..079eb5bfb0c 100644 --- a/javascript/ql/src/codeql-pack.release.yml +++ b/javascript/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.3.8 +lastReleaseVersion: 2.3.9 diff --git a/javascript/ql/src/qlpack.yml b/javascript/ql/src/qlpack.yml index 9081791d0e0..417df72e5ba 100644 --- a/javascript/ql/src/qlpack.yml +++ b/javascript/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/javascript-queries -version: 2.3.9-dev +version: 2.3.9 groups: - javascript - queries diff --git a/misc/suite-helpers/CHANGELOG.md b/misc/suite-helpers/CHANGELOG.md index 897533f6450..d6d070a91bb 100644 --- a/misc/suite-helpers/CHANGELOG.md +++ b/misc/suite-helpers/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/misc/suite-helpers/change-notes/released/1.0.49.md b/misc/suite-helpers/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/misc/suite-helpers/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/misc/suite-helpers/codeql-pack.release.yml b/misc/suite-helpers/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/misc/suite-helpers/codeql-pack.release.yml +++ b/misc/suite-helpers/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/misc/suite-helpers/qlpack.yml b/misc/suite-helpers/qlpack.yml index 778284fbe9a..52fc453aa32 100644 --- a/misc/suite-helpers/qlpack.yml +++ b/misc/suite-helpers/qlpack.yml @@ -1,4 +1,4 @@ name: codeql/suite-helpers -version: 1.0.49-dev +version: 1.0.49 groups: shared warnOnImplicitThis: true diff --git a/python/ql/lib/CHANGELOG.md b/python/ql/lib/CHANGELOG.md index 69fa60a6675..8ede35e9bdc 100644 --- a/python/ql/lib/CHANGELOG.md +++ b/python/ql/lib/CHANGELOG.md @@ -1,3 +1,13 @@ +## 7.1.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Python](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/). + +### Minor Analysis Improvements + +- The Python extractor now supports unpacking in comprehensions, e.g. `[*x for x in nested]` (as defined in [PEP-798](https://peps.python.org/pep-0798/)) that will be part of Python 3.15. + ## 7.0.5 ### Minor Analysis Improvements diff --git a/python/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/python/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md deleted file mode 100644 index 522801a0e46..00000000000 --- a/python/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: feature ---- -* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Python](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/). diff --git a/python/ql/lib/change-notes/2026-04-10-support-comprehension-unpacking.md b/python/ql/lib/change-notes/2026-04-10-support-comprehension-unpacking.md deleted file mode 100644 index d7406d0a606..00000000000 --- a/python/ql/lib/change-notes/2026-04-10-support-comprehension-unpacking.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -category: minorAnalysis ---- - -- The Python extractor now supports unpacking in comprehensions, e.g. `[*x for x in nested]` (as defined in [PEP-798](https://peps.python.org/pep-0798/)) that will be part of Python 3.15. diff --git a/python/ql/lib/change-notes/released/7.1.0.md b/python/ql/lib/change-notes/released/7.1.0.md new file mode 100644 index 00000000000..553b5fe4cee --- /dev/null +++ b/python/ql/lib/change-notes/released/7.1.0.md @@ -0,0 +1,9 @@ +## 7.1.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Python](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-python/). + +### Minor Analysis Improvements + +- The Python extractor now supports unpacking in comprehensions, e.g. `[*x for x in nested]` (as defined in [PEP-798](https://peps.python.org/pep-0798/)) that will be part of Python 3.15. diff --git a/python/ql/lib/codeql-pack.release.yml b/python/ql/lib/codeql-pack.release.yml index 2cff21d59fe..dcaaa76112a 100644 --- a/python/ql/lib/codeql-pack.release.yml +++ b/python/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 7.0.5 +lastReleaseVersion: 7.1.0 diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 8564a098594..2cd96a3e443 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/python-all -version: 7.0.6-dev +version: 7.1.0 groups: python dbscheme: semmlecode.python.dbscheme extractor: python diff --git a/python/ql/src/CHANGELOG.md b/python/ql/src/CHANGELOG.md index 38018f09856..8676d754d01 100644 --- a/python/ql/src/CHANGELOG.md +++ b/python/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.8.2 + +No user-facing changes. + ## 1.8.1 ### Minor Analysis Improvements diff --git a/python/ql/src/change-notes/released/1.8.2.md b/python/ql/src/change-notes/released/1.8.2.md new file mode 100644 index 00000000000..12e641fd720 --- /dev/null +++ b/python/ql/src/change-notes/released/1.8.2.md @@ -0,0 +1,3 @@ +## 1.8.2 + +No user-facing changes. diff --git a/python/ql/src/codeql-pack.release.yml b/python/ql/src/codeql-pack.release.yml index 28a7c123ae8..559af8348bb 100644 --- a/python/ql/src/codeql-pack.release.yml +++ b/python/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.8.1 +lastReleaseVersion: 1.8.2 diff --git a/python/ql/src/qlpack.yml b/python/ql/src/qlpack.yml index 2d99bcd0c7a..46e7203a953 100644 --- a/python/ql/src/qlpack.yml +++ b/python/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/python-queries -version: 1.8.2-dev +version: 1.8.2 groups: - python - queries diff --git a/ruby/ql/lib/CHANGELOG.md b/ruby/ql/lib/CHANGELOG.md index 8315b641369..e8dd84283c6 100644 --- a/ruby/ql/lib/CHANGELOG.md +++ b/ruby/ql/lib/CHANGELOG.md @@ -1,3 +1,9 @@ +## 5.2.0 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Ruby](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-ruby/). + ## 5.1.16 No user-facing changes. diff --git a/ruby/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/ruby/ql/lib/change-notes/released/5.2.0.md similarity index 89% rename from ruby/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md rename to ruby/ql/lib/change-notes/released/5.2.0.md index da53d584e11..c17c834f18d 100644 --- a/ruby/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ b/ruby/ql/lib/change-notes/released/5.2.0.md @@ -1,4 +1,5 @@ ---- -category: feature ---- +## 5.2.0 + +### New Features + * Data flow barriers and barrier guards can now be added using data extensions. For more information see [Customizing library models for Ruby](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-ruby/). diff --git a/ruby/ql/lib/codeql-pack.release.yml b/ruby/ql/lib/codeql-pack.release.yml index 735f742e9af..9e57a36a7dc 100644 --- a/ruby/ql/lib/codeql-pack.release.yml +++ b/ruby/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 5.1.16 +lastReleaseVersion: 5.2.0 diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index 1ac5090098a..261a9890d44 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ruby-all -version: 5.1.17-dev +version: 5.2.0 groups: ruby extractor: ruby dbscheme: ruby.dbscheme diff --git a/ruby/ql/src/CHANGELOG.md b/ruby/ql/src/CHANGELOG.md index 5266fc1d5d9..927a40e596d 100644 --- a/ruby/ql/src/CHANGELOG.md +++ b/ruby/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.6.2 + +No user-facing changes. + ## 1.6.1 No user-facing changes. diff --git a/ruby/ql/src/change-notes/released/1.6.2.md b/ruby/ql/src/change-notes/released/1.6.2.md new file mode 100644 index 00000000000..bbe3747556f --- /dev/null +++ b/ruby/ql/src/change-notes/released/1.6.2.md @@ -0,0 +1,3 @@ +## 1.6.2 + +No user-facing changes. diff --git a/ruby/ql/src/codeql-pack.release.yml b/ruby/ql/src/codeql-pack.release.yml index ef7a789e0cf..5f5beb68311 100644 --- a/ruby/ql/src/codeql-pack.release.yml +++ b/ruby/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.6.1 +lastReleaseVersion: 1.6.2 diff --git a/ruby/ql/src/qlpack.yml b/ruby/ql/src/qlpack.yml index 978102bb82a..34f5d14c39c 100644 --- a/ruby/ql/src/qlpack.yml +++ b/ruby/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ruby-queries -version: 1.6.2-dev +version: 1.6.2 groups: - ruby - queries diff --git a/rust/ql/lib/CHANGELOG.md b/rust/ql/lib/CHANGELOG.md index 8e515660f29..30ae7d73b67 100644 --- a/rust/ql/lib/CHANGELOG.md +++ b/rust/ql/lib/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.2.13 + +### New Features + +* Data flow barriers and barrier guards can now be added using data extensions. + ## 0.2.12 No user-facing changes. diff --git a/rust/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md b/rust/ql/lib/change-notes/released/0.2.13.md similarity index 73% rename from rust/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md rename to rust/ql/lib/change-notes/released/0.2.13.md index 5e97a1533a9..9c390c9ca09 100644 --- a/rust/ql/lib/change-notes/2026-03-20-data-extensions-barriers.md +++ b/rust/ql/lib/change-notes/released/0.2.13.md @@ -1,4 +1,5 @@ ---- -category: feature ---- +## 0.2.13 + +### New Features + * Data flow barriers and barrier guards can now be added using data extensions. diff --git a/rust/ql/lib/codeql-pack.release.yml b/rust/ql/lib/codeql-pack.release.yml index da1cea93393..979eb20092e 100644 --- a/rust/ql/lib/codeql-pack.release.yml +++ b/rust/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.2.12 +lastReleaseVersion: 0.2.13 diff --git a/rust/ql/lib/qlpack.yml b/rust/ql/lib/qlpack.yml index 7eb159e4b50..96b825fd949 100644 --- a/rust/ql/lib/qlpack.yml +++ b/rust/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rust-all -version: 0.2.13-dev +version: 0.2.13 groups: rust extractor: rust dbscheme: rust.dbscheme diff --git a/rust/ql/src/CHANGELOG.md b/rust/ql/src/CHANGELOG.md index 14034c9877d..4b735337c90 100644 --- a/rust/ql/src/CHANGELOG.md +++ b/rust/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.1.34 + +No user-facing changes. + ## 0.1.33 No user-facing changes. diff --git a/rust/ql/src/change-notes/released/0.1.34.md b/rust/ql/src/change-notes/released/0.1.34.md new file mode 100644 index 00000000000..a3a4edb1e1d --- /dev/null +++ b/rust/ql/src/change-notes/released/0.1.34.md @@ -0,0 +1,3 @@ +## 0.1.34 + +No user-facing changes. diff --git a/rust/ql/src/codeql-pack.release.yml b/rust/ql/src/codeql-pack.release.yml index d9c9e819daa..a1d4333b19b 100644 --- a/rust/ql/src/codeql-pack.release.yml +++ b/rust/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.1.33 +lastReleaseVersion: 0.1.34 diff --git a/rust/ql/src/qlpack.yml b/rust/ql/src/qlpack.yml index 7b2bd73728a..3fde632f1ac 100644 --- a/rust/ql/src/qlpack.yml +++ b/rust/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rust-queries -version: 0.1.34-dev +version: 0.1.34 groups: - rust - queries diff --git a/shared/concepts/CHANGELOG.md b/shared/concepts/CHANGELOG.md index e8b920847e9..59942922ea0 100644 --- a/shared/concepts/CHANGELOG.md +++ b/shared/concepts/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.0.23 + +No user-facing changes. + ## 0.0.22 No user-facing changes. diff --git a/shared/concepts/change-notes/released/0.0.23.md b/shared/concepts/change-notes/released/0.0.23.md new file mode 100644 index 00000000000..e89a1284bb8 --- /dev/null +++ b/shared/concepts/change-notes/released/0.0.23.md @@ -0,0 +1,3 @@ +## 0.0.23 + +No user-facing changes. diff --git a/shared/concepts/codeql-pack.release.yml b/shared/concepts/codeql-pack.release.yml index 11aaa2243f5..cc2195603d8 100644 --- a/shared/concepts/codeql-pack.release.yml +++ b/shared/concepts/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.0.22 +lastReleaseVersion: 0.0.23 diff --git a/shared/concepts/qlpack.yml b/shared/concepts/qlpack.yml index 947826f7dfd..95f898e6a70 100644 --- a/shared/concepts/qlpack.yml +++ b/shared/concepts/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/concepts -version: 0.0.23-dev +version: 0.0.23 groups: shared library: true dependencies: diff --git a/shared/controlflow/CHANGELOG.md b/shared/controlflow/CHANGELOG.md index df00c6146d8..9c6d7c09d17 100644 --- a/shared/controlflow/CHANGELOG.md +++ b/shared/controlflow/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.0.33 + +No user-facing changes. + ## 2.0.32 No user-facing changes. diff --git a/shared/controlflow/change-notes/released/2.0.33.md b/shared/controlflow/change-notes/released/2.0.33.md new file mode 100644 index 00000000000..d33a61332cf --- /dev/null +++ b/shared/controlflow/change-notes/released/2.0.33.md @@ -0,0 +1,3 @@ +## 2.0.33 + +No user-facing changes. diff --git a/shared/controlflow/codeql-pack.release.yml b/shared/controlflow/codeql-pack.release.yml index 483a0d5db8e..92e23200b4d 100644 --- a/shared/controlflow/codeql-pack.release.yml +++ b/shared/controlflow/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.0.32 +lastReleaseVersion: 2.0.33 diff --git a/shared/controlflow/qlpack.yml b/shared/controlflow/qlpack.yml index adc4aedc5c3..fa246d14d69 100644 --- a/shared/controlflow/qlpack.yml +++ b/shared/controlflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/controlflow -version: 2.0.33-dev +version: 2.0.33 groups: shared library: true dependencies: diff --git a/shared/dataflow/CHANGELOG.md b/shared/dataflow/CHANGELOG.md index ed60239f3de..9e4ca0a0ea8 100644 --- a/shared/dataflow/CHANGELOG.md +++ b/shared/dataflow/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.1.5 + +No user-facing changes. + ## 2.1.4 No user-facing changes. diff --git a/shared/dataflow/change-notes/released/2.1.5.md b/shared/dataflow/change-notes/released/2.1.5.md new file mode 100644 index 00000000000..7e559ea5dd0 --- /dev/null +++ b/shared/dataflow/change-notes/released/2.1.5.md @@ -0,0 +1,3 @@ +## 2.1.5 + +No user-facing changes. diff --git a/shared/dataflow/codeql-pack.release.yml b/shared/dataflow/codeql-pack.release.yml index 896b46fda9b..a890ff0111c 100644 --- a/shared/dataflow/codeql-pack.release.yml +++ b/shared/dataflow/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.1.4 +lastReleaseVersion: 2.1.5 diff --git a/shared/dataflow/qlpack.yml b/shared/dataflow/qlpack.yml index a18b746e4b4..700651f8de6 100644 --- a/shared/dataflow/qlpack.yml +++ b/shared/dataflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/dataflow -version: 2.1.5-dev +version: 2.1.5 groups: shared library: true dependencies: diff --git a/shared/mad/CHANGELOG.md b/shared/mad/CHANGELOG.md index ff868403d0c..5d5551d10a3 100644 --- a/shared/mad/CHANGELOG.md +++ b/shared/mad/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/mad/change-notes/released/1.0.49.md b/shared/mad/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/mad/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/mad/codeql-pack.release.yml b/shared/mad/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/mad/codeql-pack.release.yml +++ b/shared/mad/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/mad/qlpack.yml b/shared/mad/qlpack.yml index dd5fcf54034..472719d4127 100644 --- a/shared/mad/qlpack.yml +++ b/shared/mad/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/mad -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true dependencies: diff --git a/shared/quantum/CHANGELOG.md b/shared/quantum/CHANGELOG.md index eccc65c6041..be06e2108a8 100644 --- a/shared/quantum/CHANGELOG.md +++ b/shared/quantum/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.0.27 + +No user-facing changes. + ## 0.0.26 No user-facing changes. diff --git a/shared/quantum/change-notes/released/0.0.27.md b/shared/quantum/change-notes/released/0.0.27.md new file mode 100644 index 00000000000..ff6e274427b --- /dev/null +++ b/shared/quantum/change-notes/released/0.0.27.md @@ -0,0 +1,3 @@ +## 0.0.27 + +No user-facing changes. diff --git a/shared/quantum/codeql-pack.release.yml b/shared/quantum/codeql-pack.release.yml index c576d2d7db2..dbab90d6989 100644 --- a/shared/quantum/codeql-pack.release.yml +++ b/shared/quantum/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.0.26 +lastReleaseVersion: 0.0.27 diff --git a/shared/quantum/qlpack.yml b/shared/quantum/qlpack.yml index c4e5d41dfaa..d29cac4faa1 100644 --- a/shared/quantum/qlpack.yml +++ b/shared/quantum/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/quantum -version: 0.0.27-dev +version: 0.0.27 groups: shared library: true dependencies: diff --git a/shared/rangeanalysis/CHANGELOG.md b/shared/rangeanalysis/CHANGELOG.md index 9afb612f18a..8b457ef5927 100644 --- a/shared/rangeanalysis/CHANGELOG.md +++ b/shared/rangeanalysis/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/rangeanalysis/change-notes/released/1.0.49.md b/shared/rangeanalysis/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/rangeanalysis/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/rangeanalysis/codeql-pack.release.yml b/shared/rangeanalysis/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/rangeanalysis/codeql-pack.release.yml +++ b/shared/rangeanalysis/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/rangeanalysis/qlpack.yml b/shared/rangeanalysis/qlpack.yml index ed3b4a66239..4261dfb4991 100644 --- a/shared/rangeanalysis/qlpack.yml +++ b/shared/rangeanalysis/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rangeanalysis -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true dependencies: diff --git a/shared/regex/CHANGELOG.md b/shared/regex/CHANGELOG.md index 2375b7b56ab..639cede00af 100644 --- a/shared/regex/CHANGELOG.md +++ b/shared/regex/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/regex/change-notes/released/1.0.49.md b/shared/regex/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/regex/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/regex/codeql-pack.release.yml b/shared/regex/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/regex/codeql-pack.release.yml +++ b/shared/regex/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/regex/qlpack.yml b/shared/regex/qlpack.yml index 3d569c7d429..3d6feafbf39 100644 --- a/shared/regex/qlpack.yml +++ b/shared/regex/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/regex -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true dependencies: diff --git a/shared/ssa/CHANGELOG.md b/shared/ssa/CHANGELOG.md index dd21ba6d38b..9cfbb004657 100644 --- a/shared/ssa/CHANGELOG.md +++ b/shared/ssa/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.0.25 + +No user-facing changes. + ## 2.0.24 No user-facing changes. diff --git a/shared/ssa/change-notes/released/2.0.25.md b/shared/ssa/change-notes/released/2.0.25.md new file mode 100644 index 00000000000..ca39dd50c69 --- /dev/null +++ b/shared/ssa/change-notes/released/2.0.25.md @@ -0,0 +1,3 @@ +## 2.0.25 + +No user-facing changes. diff --git a/shared/ssa/codeql-pack.release.yml b/shared/ssa/codeql-pack.release.yml index 1460df314d5..f54d8620118 100644 --- a/shared/ssa/codeql-pack.release.yml +++ b/shared/ssa/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.0.24 +lastReleaseVersion: 2.0.25 diff --git a/shared/ssa/qlpack.yml b/shared/ssa/qlpack.yml index c1fd261e070..517a79c557c 100644 --- a/shared/ssa/qlpack.yml +++ b/shared/ssa/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ssa -version: 2.0.25-dev +version: 2.0.25 groups: shared library: true dependencies: diff --git a/shared/threat-models/CHANGELOG.md b/shared/threat-models/CHANGELOG.md index a3aa00d4872..a6b6055373a 100644 --- a/shared/threat-models/CHANGELOG.md +++ b/shared/threat-models/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/threat-models/change-notes/released/1.0.49.md b/shared/threat-models/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/threat-models/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/threat-models/codeql-pack.release.yml b/shared/threat-models/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/threat-models/codeql-pack.release.yml +++ b/shared/threat-models/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/threat-models/qlpack.yml b/shared/threat-models/qlpack.yml index 59ce8c06727..71e6b70a313 100644 --- a/shared/threat-models/qlpack.yml +++ b/shared/threat-models/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/threat-models -version: 1.0.49-dev +version: 1.0.49 library: true groups: shared dataExtensions: diff --git a/shared/tutorial/CHANGELOG.md b/shared/tutorial/CHANGELOG.md index 9350e8a04eb..5fcacc0b8b5 100644 --- a/shared/tutorial/CHANGELOG.md +++ b/shared/tutorial/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/tutorial/change-notes/released/1.0.49.md b/shared/tutorial/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/tutorial/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/tutorial/codeql-pack.release.yml b/shared/tutorial/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/tutorial/codeql-pack.release.yml +++ b/shared/tutorial/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/tutorial/qlpack.yml b/shared/tutorial/qlpack.yml index 36b8181e0bf..c1df7d67b85 100644 --- a/shared/tutorial/qlpack.yml +++ b/shared/tutorial/qlpack.yml @@ -1,7 +1,7 @@ name: codeql/tutorial description: Library for the CodeQL detective tutorials, helping new users learn to write CodeQL queries. -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true warnOnImplicitThis: true diff --git a/shared/typeflow/CHANGELOG.md b/shared/typeflow/CHANGELOG.md index 035c2aa456e..fbfdb431161 100644 --- a/shared/typeflow/CHANGELOG.md +++ b/shared/typeflow/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/typeflow/change-notes/released/1.0.49.md b/shared/typeflow/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/typeflow/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/typeflow/codeql-pack.release.yml b/shared/typeflow/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/typeflow/codeql-pack.release.yml +++ b/shared/typeflow/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/typeflow/qlpack.yml b/shared/typeflow/qlpack.yml index 0734b2b722e..2f22c6655a5 100644 --- a/shared/typeflow/qlpack.yml +++ b/shared/typeflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typeflow -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true dependencies: diff --git a/shared/typeinference/CHANGELOG.md b/shared/typeinference/CHANGELOG.md index c8b656e4f35..1652285654a 100644 --- a/shared/typeinference/CHANGELOG.md +++ b/shared/typeinference/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.0.30 + +No user-facing changes. + ## 0.0.29 No user-facing changes. diff --git a/shared/typeinference/change-notes/released/0.0.30.md b/shared/typeinference/change-notes/released/0.0.30.md new file mode 100644 index 00000000000..10c7a0c5c13 --- /dev/null +++ b/shared/typeinference/change-notes/released/0.0.30.md @@ -0,0 +1,3 @@ +## 0.0.30 + +No user-facing changes. diff --git a/shared/typeinference/codeql-pack.release.yml b/shared/typeinference/codeql-pack.release.yml index c81f1813120..0c61b463bab 100644 --- a/shared/typeinference/codeql-pack.release.yml +++ b/shared/typeinference/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 0.0.29 +lastReleaseVersion: 0.0.30 diff --git a/shared/typeinference/qlpack.yml b/shared/typeinference/qlpack.yml index 2bf5c49d97e..039107b5ef9 100644 --- a/shared/typeinference/qlpack.yml +++ b/shared/typeinference/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typeinference -version: 0.0.30-dev +version: 0.0.30 groups: shared library: true dependencies: diff --git a/shared/typetracking/CHANGELOG.md b/shared/typetracking/CHANGELOG.md index 75d8938e6a1..ecdded5ceda 100644 --- a/shared/typetracking/CHANGELOG.md +++ b/shared/typetracking/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.0.33 + +No user-facing changes. + ## 2.0.32 No user-facing changes. diff --git a/shared/typetracking/change-notes/released/2.0.33.md b/shared/typetracking/change-notes/released/2.0.33.md new file mode 100644 index 00000000000..d33a61332cf --- /dev/null +++ b/shared/typetracking/change-notes/released/2.0.33.md @@ -0,0 +1,3 @@ +## 2.0.33 + +No user-facing changes. diff --git a/shared/typetracking/codeql-pack.release.yml b/shared/typetracking/codeql-pack.release.yml index 483a0d5db8e..92e23200b4d 100644 --- a/shared/typetracking/codeql-pack.release.yml +++ b/shared/typetracking/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.0.32 +lastReleaseVersion: 2.0.33 diff --git a/shared/typetracking/qlpack.yml b/shared/typetracking/qlpack.yml index fe35cf5955b..4f8f21fd569 100644 --- a/shared/typetracking/qlpack.yml +++ b/shared/typetracking/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typetracking -version: 2.0.33-dev +version: 2.0.33 groups: shared library: true dependencies: diff --git a/shared/typos/CHANGELOG.md b/shared/typos/CHANGELOG.md index 35825098a63..617fa5638b4 100644 --- a/shared/typos/CHANGELOG.md +++ b/shared/typos/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/typos/change-notes/released/1.0.49.md b/shared/typos/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/typos/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/typos/codeql-pack.release.yml b/shared/typos/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/typos/codeql-pack.release.yml +++ b/shared/typos/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/typos/qlpack.yml b/shared/typos/qlpack.yml index a8c85168f20..441404e19f3 100644 --- a/shared/typos/qlpack.yml +++ b/shared/typos/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typos -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true warnOnImplicitThis: true diff --git a/shared/util/CHANGELOG.md b/shared/util/CHANGELOG.md index d1becc8ba2c..51488029e96 100644 --- a/shared/util/CHANGELOG.md +++ b/shared/util/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.0.36 + +No user-facing changes. + ## 2.0.35 No user-facing changes. diff --git a/shared/util/change-notes/released/2.0.36.md b/shared/util/change-notes/released/2.0.36.md new file mode 100644 index 00000000000..8acdd12366e --- /dev/null +++ b/shared/util/change-notes/released/2.0.36.md @@ -0,0 +1,3 @@ +## 2.0.36 + +No user-facing changes. diff --git a/shared/util/codeql-pack.release.yml b/shared/util/codeql-pack.release.yml index 27eb8ef8ece..7e4aaa0dd67 100644 --- a/shared/util/codeql-pack.release.yml +++ b/shared/util/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 2.0.35 +lastReleaseVersion: 2.0.36 diff --git a/shared/util/qlpack.yml b/shared/util/qlpack.yml index 99f8c5374dc..f3b6b7f3ff8 100644 --- a/shared/util/qlpack.yml +++ b/shared/util/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/util -version: 2.0.36-dev +version: 2.0.36 groups: shared library: true dependencies: null diff --git a/shared/xml/CHANGELOG.md b/shared/xml/CHANGELOG.md index 131bf7afd2a..9f60f66ff72 100644 --- a/shared/xml/CHANGELOG.md +++ b/shared/xml/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/xml/change-notes/released/1.0.49.md b/shared/xml/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/xml/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/xml/codeql-pack.release.yml b/shared/xml/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/xml/codeql-pack.release.yml +++ b/shared/xml/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/xml/qlpack.yml b/shared/xml/qlpack.yml index 2c44df63e7e..718c36108f6 100644 --- a/shared/xml/qlpack.yml +++ b/shared/xml/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/xml -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true dependencies: diff --git a/shared/yaml/CHANGELOG.md b/shared/yaml/CHANGELOG.md index 1c420b31355..a13e3308874 100644 --- a/shared/yaml/CHANGELOG.md +++ b/shared/yaml/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.0.49 + +No user-facing changes. + ## 1.0.48 No user-facing changes. diff --git a/shared/yaml/change-notes/released/1.0.49.md b/shared/yaml/change-notes/released/1.0.49.md new file mode 100644 index 00000000000..df67fb8cc76 --- /dev/null +++ b/shared/yaml/change-notes/released/1.0.49.md @@ -0,0 +1,3 @@ +## 1.0.49 + +No user-facing changes. diff --git a/shared/yaml/codeql-pack.release.yml b/shared/yaml/codeql-pack.release.yml index 6db79f2c397..596617977df 100644 --- a/shared/yaml/codeql-pack.release.yml +++ b/shared/yaml/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.0.48 +lastReleaseVersion: 1.0.49 diff --git a/shared/yaml/qlpack.yml b/shared/yaml/qlpack.yml index 6778ee5a156..4bb6e173ab6 100644 --- a/shared/yaml/qlpack.yml +++ b/shared/yaml/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/yaml -version: 1.0.49-dev +version: 1.0.49 groups: shared library: true warnOnImplicitThis: true diff --git a/swift/ql/lib/CHANGELOG.md b/swift/ql/lib/CHANGELOG.md index e2cb45f9769..e7979dbf0ed 100644 --- a/swift/ql/lib/CHANGELOG.md +++ b/swift/ql/lib/CHANGELOG.md @@ -1,3 +1,13 @@ +## 6.5.0 + +### New Features + +* The `BuiltinFixedArrayType` class now defines the predicates `getSize` and `getElementType`, which yield the size of the array and the type of elements stored in the array, respectively. + +### Major Analysis Improvements + +* Upgraded to allow analysis of Swift 6.3.1. + ## 6.4.0 ### Major Analysis Improvements diff --git a/swift/ql/lib/change-notes/2026-04-20-swift-6.3.1.md b/swift/ql/lib/change-notes/2026-04-20-swift-6.3.1.md deleted file mode 100644 index acc4bc73861..00000000000 --- a/swift/ql/lib/change-notes/2026-04-20-swift-6.3.1.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: majorAnalysis ---- -* Upgraded to allow analysis of Swift 6.3.1. diff --git a/swift/ql/lib/change-notes/2026-04-17-fixed-array.md b/swift/ql/lib/change-notes/released/6.5.0.md similarity index 63% rename from swift/ql/lib/change-notes/2026-04-17-fixed-array.md rename to swift/ql/lib/change-notes/released/6.5.0.md index 3fd91627544..5b390d1bfd4 100644 --- a/swift/ql/lib/change-notes/2026-04-17-fixed-array.md +++ b/swift/ql/lib/change-notes/released/6.5.0.md @@ -1,4 +1,9 @@ ---- -category: feature ---- +## 6.5.0 + +### New Features + * The `BuiltinFixedArrayType` class now defines the predicates `getSize` and `getElementType`, which yield the size of the array and the type of elements stored in the array, respectively. + +### Major Analysis Improvements + +* Upgraded to allow analysis of Swift 6.3.1. diff --git a/swift/ql/lib/codeql-pack.release.yml b/swift/ql/lib/codeql-pack.release.yml index 3098c5db6c3..2813c8e210f 100644 --- a/swift/ql/lib/codeql-pack.release.yml +++ b/swift/ql/lib/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 6.4.0 +lastReleaseVersion: 6.5.0 diff --git a/swift/ql/lib/qlpack.yml b/swift/ql/lib/qlpack.yml index 595a2804df5..ff088e209fa 100644 --- a/swift/ql/lib/qlpack.yml +++ b/swift/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/swift-all -version: 6.4.1-dev +version: 6.5.0 groups: swift extractor: swift dbscheme: swift.dbscheme diff --git a/swift/ql/src/CHANGELOG.md b/swift/ql/src/CHANGELOG.md index af70cebc1e4..f7b81101037 100644 --- a/swift/ql/src/CHANGELOG.md +++ b/swift/ql/src/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.3.2 + +No user-facing changes. + ## 1.3.1 No user-facing changes. diff --git a/swift/ql/src/change-notes/released/1.3.2.md b/swift/ql/src/change-notes/released/1.3.2.md new file mode 100644 index 00000000000..14f14807ef5 --- /dev/null +++ b/swift/ql/src/change-notes/released/1.3.2.md @@ -0,0 +1,3 @@ +## 1.3.2 + +No user-facing changes. diff --git a/swift/ql/src/codeql-pack.release.yml b/swift/ql/src/codeql-pack.release.yml index e71b6d081f1..86a9cb32d86 100644 --- a/swift/ql/src/codeql-pack.release.yml +++ b/swift/ql/src/codeql-pack.release.yml @@ -1,2 +1,2 @@ --- -lastReleaseVersion: 1.3.1 +lastReleaseVersion: 1.3.2 diff --git a/swift/ql/src/qlpack.yml b/swift/ql/src/qlpack.yml index 6b4dc1f65e5..26d9123f050 100644 --- a/swift/ql/src/qlpack.yml +++ b/swift/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/swift-queries -version: 1.3.2-dev +version: 1.3.2 groups: - swift - queries From 76102771996713aaa935d824f32575d37b983109 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 May 2026 10:10:06 +0000 Subject: [PATCH 02/11] Post-release preparation for codeql-cli-2.25.4 --- actions/ql/lib/qlpack.yml | 2 +- actions/ql/src/qlpack.yml | 2 +- cpp/ql/lib/qlpack.yml | 2 +- cpp/ql/src/qlpack.yml | 2 +- csharp/ql/campaigns/Solorigate/lib/qlpack.yml | 2 +- csharp/ql/campaigns/Solorigate/src/qlpack.yml | 2 +- csharp/ql/lib/qlpack.yml | 2 +- csharp/ql/src/qlpack.yml | 2 +- go/ql/consistency-queries/qlpack.yml | 2 +- go/ql/lib/qlpack.yml | 2 +- go/ql/src/qlpack.yml | 2 +- java/ql/lib/qlpack.yml | 2 +- java/ql/src/qlpack.yml | 2 +- javascript/ql/lib/qlpack.yml | 2 +- javascript/ql/src/qlpack.yml | 2 +- misc/suite-helpers/qlpack.yml | 2 +- python/ql/lib/qlpack.yml | 2 +- python/ql/src/qlpack.yml | 2 +- ruby/ql/lib/qlpack.yml | 2 +- ruby/ql/src/qlpack.yml | 2 +- rust/ql/lib/qlpack.yml | 2 +- rust/ql/src/qlpack.yml | 2 +- shared/concepts/qlpack.yml | 2 +- shared/controlflow/qlpack.yml | 2 +- shared/dataflow/qlpack.yml | 2 +- shared/mad/qlpack.yml | 2 +- shared/quantum/qlpack.yml | 2 +- shared/rangeanalysis/qlpack.yml | 2 +- shared/regex/qlpack.yml | 2 +- shared/ssa/qlpack.yml | 2 +- shared/threat-models/qlpack.yml | 2 +- shared/tutorial/qlpack.yml | 2 +- shared/typeflow/qlpack.yml | 2 +- shared/typeinference/qlpack.yml | 2 +- shared/typetracking/qlpack.yml | 2 +- shared/typos/qlpack.yml | 2 +- shared/util/qlpack.yml | 2 +- shared/xml/qlpack.yml | 2 +- shared/yaml/qlpack.yml | 2 +- swift/ql/lib/qlpack.yml | 2 +- swift/ql/src/qlpack.yml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/actions/ql/lib/qlpack.yml b/actions/ql/lib/qlpack.yml index a20f2e7a507..cf1232c01fb 100644 --- a/actions/ql/lib/qlpack.yml +++ b/actions/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/actions-all -version: 0.4.35 +version: 0.4.36-dev library: true warnOnImplicitThis: true dependencies: diff --git a/actions/ql/src/qlpack.yml b/actions/ql/src/qlpack.yml index bcc7fe06a3b..2c9811c780a 100644 --- a/actions/ql/src/qlpack.yml +++ b/actions/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/actions-queries -version: 0.6.27 +version: 0.6.28-dev library: false warnOnImplicitThis: true groups: [actions, queries] diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index e3b9f7c3363..88d4dafaebb 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/cpp-all -version: 10.1.0 +version: 10.1.1-dev groups: cpp dbscheme: semmlecode.cpp.dbscheme extractor: cpp diff --git a/cpp/ql/src/qlpack.yml b/cpp/ql/src/qlpack.yml index 83d7a32e6d4..ddd0014f2e0 100644 --- a/cpp/ql/src/qlpack.yml +++ b/cpp/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/cpp-queries -version: 1.6.2 +version: 1.6.3-dev groups: - cpp - queries diff --git a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml index 88ba74212c7..c5cfa0dcd89 100644 --- a/csharp/ql/campaigns/Solorigate/lib/qlpack.yml +++ b/csharp/ql/campaigns/Solorigate/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-solorigate-all -version: 1.7.66 +version: 1.7.67-dev groups: - csharp - solorigate diff --git a/csharp/ql/campaigns/Solorigate/src/qlpack.yml b/csharp/ql/campaigns/Solorigate/src/qlpack.yml index fee050486c9..c9f0acac983 100644 --- a/csharp/ql/campaigns/Solorigate/src/qlpack.yml +++ b/csharp/ql/campaigns/Solorigate/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-solorigate-queries -version: 1.7.66 +version: 1.7.67-dev groups: - csharp - solorigate diff --git a/csharp/ql/lib/qlpack.yml b/csharp/ql/lib/qlpack.yml index daded1ee71e..0e03db99ca9 100644 --- a/csharp/ql/lib/qlpack.yml +++ b/csharp/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-all -version: 6.0.0 +version: 6.0.1-dev groups: csharp dbscheme: semmlecode.csharp.dbscheme extractor: csharp diff --git a/csharp/ql/src/qlpack.yml b/csharp/ql/src/qlpack.yml index 72d951194c3..de201743065 100644 --- a/csharp/ql/src/qlpack.yml +++ b/csharp/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/csharp-queries -version: 1.7.2 +version: 1.7.3-dev groups: - csharp - queries diff --git a/go/ql/consistency-queries/qlpack.yml b/go/ql/consistency-queries/qlpack.yml index 73a837bd0e0..49572c215bb 100644 --- a/go/ql/consistency-queries/qlpack.yml +++ b/go/ql/consistency-queries/qlpack.yml @@ -1,5 +1,5 @@ name: codeql-go-consistency-queries -version: 1.0.49 +version: 1.0.50-dev groups: - go - queries diff --git a/go/ql/lib/qlpack.yml b/go/ql/lib/qlpack.yml index 1d1682d6108..16ddb133517 100644 --- a/go/ql/lib/qlpack.yml +++ b/go/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/go-all -version: 7.1.0 +version: 7.1.1-dev groups: go dbscheme: go.dbscheme extractor: go diff --git a/go/ql/src/qlpack.yml b/go/ql/src/qlpack.yml index 78c75459387..5fdc63fe363 100644 --- a/go/ql/src/qlpack.yml +++ b/go/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/go-queries -version: 1.6.2 +version: 1.6.3-dev groups: - go - queries diff --git a/java/ql/lib/qlpack.yml b/java/ql/lib/qlpack.yml index e57412ee1fc..e5ebb1d2131 100644 --- a/java/ql/lib/qlpack.yml +++ b/java/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/java-all -version: 9.1.0 +version: 9.1.1-dev groups: java dbscheme: config/semmlecode.dbscheme extractor: java diff --git a/java/ql/src/qlpack.yml b/java/ql/src/qlpack.yml index bdaaf77ec9e..3776af914c2 100644 --- a/java/ql/src/qlpack.yml +++ b/java/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/java-queries -version: 1.11.2 +version: 1.11.3-dev groups: - java - queries diff --git a/javascript/ql/lib/qlpack.yml b/javascript/ql/lib/qlpack.yml index 97c3de6a8eb..623a7f4db2f 100644 --- a/javascript/ql/lib/qlpack.yml +++ b/javascript/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/javascript-all -version: 2.7.0 +version: 2.7.1-dev groups: javascript dbscheme: semmlecode.javascript.dbscheme extractor: javascript diff --git a/javascript/ql/src/qlpack.yml b/javascript/ql/src/qlpack.yml index 417df72e5ba..593c0032466 100644 --- a/javascript/ql/src/qlpack.yml +++ b/javascript/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/javascript-queries -version: 2.3.9 +version: 2.3.10-dev groups: - javascript - queries diff --git a/misc/suite-helpers/qlpack.yml b/misc/suite-helpers/qlpack.yml index 52fc453aa32..802112d0fc8 100644 --- a/misc/suite-helpers/qlpack.yml +++ b/misc/suite-helpers/qlpack.yml @@ -1,4 +1,4 @@ name: codeql/suite-helpers -version: 1.0.49 +version: 1.0.50-dev groups: shared warnOnImplicitThis: true diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 2cd96a3e443..724d270b4e8 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/python-all -version: 7.1.0 +version: 7.1.1-dev groups: python dbscheme: semmlecode.python.dbscheme extractor: python diff --git a/python/ql/src/qlpack.yml b/python/ql/src/qlpack.yml index 46e7203a953..0544fddc168 100644 --- a/python/ql/src/qlpack.yml +++ b/python/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/python-queries -version: 1.8.2 +version: 1.8.3-dev groups: - python - queries diff --git a/ruby/ql/lib/qlpack.yml b/ruby/ql/lib/qlpack.yml index 261a9890d44..cfdaf454426 100644 --- a/ruby/ql/lib/qlpack.yml +++ b/ruby/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ruby-all -version: 5.2.0 +version: 5.2.1-dev groups: ruby extractor: ruby dbscheme: ruby.dbscheme diff --git a/ruby/ql/src/qlpack.yml b/ruby/ql/src/qlpack.yml index 34f5d14c39c..beaf92c0d8a 100644 --- a/ruby/ql/src/qlpack.yml +++ b/ruby/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ruby-queries -version: 1.6.2 +version: 1.6.3-dev groups: - ruby - queries diff --git a/rust/ql/lib/qlpack.yml b/rust/ql/lib/qlpack.yml index 96b825fd949..47f846eb02b 100644 --- a/rust/ql/lib/qlpack.yml +++ b/rust/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rust-all -version: 0.2.13 +version: 0.2.14-dev groups: rust extractor: rust dbscheme: rust.dbscheme diff --git a/rust/ql/src/qlpack.yml b/rust/ql/src/qlpack.yml index 3fde632f1ac..ec76e8f1265 100644 --- a/rust/ql/src/qlpack.yml +++ b/rust/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rust-queries -version: 0.1.34 +version: 0.1.35-dev groups: - rust - queries diff --git a/shared/concepts/qlpack.yml b/shared/concepts/qlpack.yml index 95f898e6a70..6943387bae2 100644 --- a/shared/concepts/qlpack.yml +++ b/shared/concepts/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/concepts -version: 0.0.23 +version: 0.0.24-dev groups: shared library: true dependencies: diff --git a/shared/controlflow/qlpack.yml b/shared/controlflow/qlpack.yml index fa246d14d69..7fbe2fd4ea9 100644 --- a/shared/controlflow/qlpack.yml +++ b/shared/controlflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/controlflow -version: 2.0.33 +version: 2.0.34-dev groups: shared library: true dependencies: diff --git a/shared/dataflow/qlpack.yml b/shared/dataflow/qlpack.yml index 700651f8de6..34336f4c322 100644 --- a/shared/dataflow/qlpack.yml +++ b/shared/dataflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/dataflow -version: 2.1.5 +version: 2.1.6-dev groups: shared library: true dependencies: diff --git a/shared/mad/qlpack.yml b/shared/mad/qlpack.yml index 472719d4127..5631fbf1a73 100644 --- a/shared/mad/qlpack.yml +++ b/shared/mad/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/mad -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true dependencies: diff --git a/shared/quantum/qlpack.yml b/shared/quantum/qlpack.yml index d29cac4faa1..794f238b6e9 100644 --- a/shared/quantum/qlpack.yml +++ b/shared/quantum/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/quantum -version: 0.0.27 +version: 0.0.28-dev groups: shared library: true dependencies: diff --git a/shared/rangeanalysis/qlpack.yml b/shared/rangeanalysis/qlpack.yml index 4261dfb4991..e43819bf36e 100644 --- a/shared/rangeanalysis/qlpack.yml +++ b/shared/rangeanalysis/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/rangeanalysis -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true dependencies: diff --git a/shared/regex/qlpack.yml b/shared/regex/qlpack.yml index 3d6feafbf39..242382c6a1f 100644 --- a/shared/regex/qlpack.yml +++ b/shared/regex/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/regex -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true dependencies: diff --git a/shared/ssa/qlpack.yml b/shared/ssa/qlpack.yml index 517a79c557c..9e510c43dd0 100644 --- a/shared/ssa/qlpack.yml +++ b/shared/ssa/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/ssa -version: 2.0.25 +version: 2.0.26-dev groups: shared library: true dependencies: diff --git a/shared/threat-models/qlpack.yml b/shared/threat-models/qlpack.yml index 71e6b70a313..14e33e435d1 100644 --- a/shared/threat-models/qlpack.yml +++ b/shared/threat-models/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/threat-models -version: 1.0.49 +version: 1.0.50-dev library: true groups: shared dataExtensions: diff --git a/shared/tutorial/qlpack.yml b/shared/tutorial/qlpack.yml index c1df7d67b85..10b77ca0438 100644 --- a/shared/tutorial/qlpack.yml +++ b/shared/tutorial/qlpack.yml @@ -1,7 +1,7 @@ name: codeql/tutorial description: Library for the CodeQL detective tutorials, helping new users learn to write CodeQL queries. -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true warnOnImplicitThis: true diff --git a/shared/typeflow/qlpack.yml b/shared/typeflow/qlpack.yml index 2f22c6655a5..9301be5228f 100644 --- a/shared/typeflow/qlpack.yml +++ b/shared/typeflow/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typeflow -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true dependencies: diff --git a/shared/typeinference/qlpack.yml b/shared/typeinference/qlpack.yml index 039107b5ef9..1db3c0e95ac 100644 --- a/shared/typeinference/qlpack.yml +++ b/shared/typeinference/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typeinference -version: 0.0.30 +version: 0.0.31-dev groups: shared library: true dependencies: diff --git a/shared/typetracking/qlpack.yml b/shared/typetracking/qlpack.yml index 4f8f21fd569..436b35815f0 100644 --- a/shared/typetracking/qlpack.yml +++ b/shared/typetracking/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typetracking -version: 2.0.33 +version: 2.0.34-dev groups: shared library: true dependencies: diff --git a/shared/typos/qlpack.yml b/shared/typos/qlpack.yml index 441404e19f3..4057cfb5784 100644 --- a/shared/typos/qlpack.yml +++ b/shared/typos/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/typos -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true warnOnImplicitThis: true diff --git a/shared/util/qlpack.yml b/shared/util/qlpack.yml index f3b6b7f3ff8..cff240056a4 100644 --- a/shared/util/qlpack.yml +++ b/shared/util/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/util -version: 2.0.36 +version: 2.0.37-dev groups: shared library: true dependencies: null diff --git a/shared/xml/qlpack.yml b/shared/xml/qlpack.yml index 718c36108f6..748ac73e0f8 100644 --- a/shared/xml/qlpack.yml +++ b/shared/xml/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/xml -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true dependencies: diff --git a/shared/yaml/qlpack.yml b/shared/yaml/qlpack.yml index 4bb6e173ab6..54d582bb9fa 100644 --- a/shared/yaml/qlpack.yml +++ b/shared/yaml/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/yaml -version: 1.0.49 +version: 1.0.50-dev groups: shared library: true warnOnImplicitThis: true diff --git a/swift/ql/lib/qlpack.yml b/swift/ql/lib/qlpack.yml index ff088e209fa..921af77ca70 100644 --- a/swift/ql/lib/qlpack.yml +++ b/swift/ql/lib/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/swift-all -version: 6.5.0 +version: 6.5.1-dev groups: swift extractor: swift dbscheme: swift.dbscheme diff --git a/swift/ql/src/qlpack.yml b/swift/ql/src/qlpack.yml index 26d9123f050..5354c5ee4d5 100644 --- a/swift/ql/src/qlpack.yml +++ b/swift/ql/src/qlpack.yml @@ -1,5 +1,5 @@ name: codeql/swift-queries -version: 1.3.2 +version: 1.3.3-dev groups: - swift - queries From 04f587190e42c852823d2a340854c2d1649a26fd Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:08 +0000 Subject: [PATCH 03/11] yeast: AST desugaring framework with proc-macro DSL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit YEAST (YEAST Elaborates Abstract Syntax Trees) is a framework for transforming tree-sitter parse trees before CodeQL extraction. Core components: - shared/yeast/ — Ast, Node, Schema, query matching engine, captures, FreshScope, BuildCtx - shared/yeast-macros/ — proc macros: query!, tree!, trees!, rule! The query language is inspired by tree-sitter queries: (assignment left: (_) @lhs right: (_) @rhs) Templates support embedded Rust ({expr}), splicing ({..expr}), computed literals (#{expr}), and fresh identifiers ($name). The rule! macro combines query and transform: rule!((for pattern: (_) @pat ...) => (call receiver: {val} ...)) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Cargo.lock | 40 +- Cargo.toml | 2 + shared/yeast-macros/Cargo.toml | 12 + shared/yeast-macros/src/lib.rs | 105 ++++ shared/yeast-macros/src/parse.rs | 838 +++++++++++++++++++++++++++++++ shared/yeast/Cargo.toml | 15 + shared/yeast/src/bin/main.rs | 26 + shared/yeast/src/build.rs | 91 ++++ shared/yeast/src/captures.rs | 105 ++++ shared/yeast/src/cursor.rs | 8 + shared/yeast/src/lib.rs | 727 +++++++++++++++++++++++++++ shared/yeast/src/query.rs | 228 +++++++++ shared/yeast/src/range.rs | 21 + shared/yeast/src/schema.rs | 167 ++++++ shared/yeast/src/tree_builder.rs | 43 ++ shared/yeast/src/visitor.rs | 111 ++++ 16 files changed, 2535 insertions(+), 4 deletions(-) create mode 100644 shared/yeast-macros/Cargo.toml create mode 100644 shared/yeast-macros/src/lib.rs create mode 100644 shared/yeast-macros/src/parse.rs create mode 100644 shared/yeast/Cargo.toml create mode 100644 shared/yeast/src/bin/main.rs create mode 100644 shared/yeast/src/build.rs create mode 100644 shared/yeast/src/captures.rs create mode 100644 shared/yeast/src/cursor.rs create mode 100644 shared/yeast/src/lib.rs create mode 100644 shared/yeast/src/query.rs create mode 100644 shared/yeast/src/range.rs create mode 100644 shared/yeast/src/schema.rs create mode 100644 shared/yeast/src/tree_builder.rs create mode 100644 shared/yeast/src/visitor.rs diff --git a/Cargo.lock b/Cargo.lock index b6456c84106..38a3c806fb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -416,6 +416,7 @@ dependencies = [ "tree-sitter", "tree-sitter-json", "tree-sitter-ql", + "yeast", "zstd", ] @@ -2470,7 +2471,6 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "indexmap 2.11.4", "itoa", "memchr", "ryu", @@ -2853,14 +2853,13 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.25.9" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", "regex-syntax", - "serde_json", "streaming-iterator", "tree-sitter-language", ] @@ -2891,6 +2890,16 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-ql" version = "0.23.1" @@ -3367,6 +3376,29 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yeast" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", + "serde_yaml", + "tree-sitter", + "tree-sitter-python", + "tree-sitter-ruby", + "yeast-macros", +] + +[[package]] +name = "yeast-macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "yoke" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 58a755340b9..1e2be0d9ca5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,8 @@ resolver = "2" members = [ "shared/tree-sitter-extractor", + "shared/yeast", + "shared/yeast-macros", "ruby/extractor", "rust/extractor", "rust/extractor/macros", diff --git a/shared/yeast-macros/Cargo.toml b/shared/yeast-macros/Cargo.toml new file mode 100644 index 00000000000..30c82d03b6e --- /dev/null +++ b/shared/yeast-macros/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "yeast-macros" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = "2.0" diff --git a/shared/yeast-macros/src/lib.rs b/shared/yeast-macros/src/lib.rs new file mode 100644 index 00000000000..0c264ee13c8 --- /dev/null +++ b/shared/yeast-macros/src/lib.rs @@ -0,0 +1,105 @@ +use proc_macro::TokenStream; +use proc_macro2::TokenStream as TokenStream2; + +mod parse; + +/// Proc macro for constructing a `QueryNode` from a tree-sitter-inspired pattern. +/// +/// # Syntax +/// +/// ```text +/// (_) - match any named node (skips unnamed tokens) +/// (kind) - match a named node of the given kind +/// ("literal") - match an unnamed token by its text +/// (kind field: (pattern)) - match with named field +/// (kind (pat) (pat)...) - match unnamed children (after all fields) +/// (pattern) @capture - capture the matched node +/// (pattern)* @capture - capture each repeated match +/// (pattern)? - zero or one +/// ``` +#[proc_macro] +pub fn query(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_query_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} + +/// Build a single AST node from a template, returning its `Id`. +/// +/// # Template syntax +/// +/// ```text +/// (kind "literal") - leaf with static content +/// (kind #{expr}) - leaf with computed content (expr.to_string()) +/// (kind $fresh) - leaf with auto-generated unique name +/// {expr} - embed a Rust expression returning Id +/// {..expr} - splice an iterable of Id (in child/field position) +/// field: {..expr} - splice into a named field +/// ``` +/// +/// Can be called with an explicit context or using the implicit context +/// from an enclosing `rule!`: +/// +/// ```text +/// tree!(ctx, (kind ...)) // explicit BuildCtx +/// tree!((kind ...)) // implicit context from rule! +/// ``` +#[proc_macro] +pub fn tree(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_tree_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} + +/// Build a list of AST nodes from a template, returning `Vec`. +/// +/// Like `tree!` but returns `Vec` and supports multiple top-level +/// elements. All syntax from `tree!` is available. +/// +/// Can be called with an explicit context or using the implicit context +/// from an enclosing `rule!`: +/// +/// ```text +/// trees!(ctx, (node1 ...) (node2 ...)) // explicit BuildCtx +/// trees!((node1 ...) (node2 ...)) // implicit context from rule! +/// ``` +#[proc_macro] +pub fn trees(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_trees_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} + +/// Define a desugaring rule with query and transform in one declaration. +/// +/// ```text +/// rule!( +/// (query_pattern field: (_) @name (kind)* @repeated (_)? @optional) +/// => +/// (output_template field: {name} {..repeated}) +/// ) +/// +/// // Shorthand: captures become fields on the output node +/// rule!((query ...) => output_kind) +/// ``` +/// +/// Captures become Rust variables automatically: +/// - `@name` (no quantifier) → `name: Id` +/// - `@name` (after `*`/`+`) → `name: Vec` +/// - `@name` (after `?`) → `name: Option` +/// +/// `tree!` and `trees!` can be used without explicit context inside `{...}`. +#[proc_macro] +pub fn rule(input: TokenStream) -> TokenStream { + let input2: TokenStream2 = input.into(); + match parse::parse_rule_top(input2) { + Ok(output) => output.into(), + Err(err) => err.to_compile_error().into(), + } +} diff --git a/shared/yeast-macros/src/parse.rs b/shared/yeast-macros/src/parse.rs new file mode 100644 index 00000000000..f8554f3178c --- /dev/null +++ b/shared/yeast-macros/src/parse.rs @@ -0,0 +1,838 @@ +use proc_macro2::{Delimiter, Ident, Literal, Span, TokenStream, TokenTree}; +use quote::quote; +use std::iter::Peekable; + +type Tokens = Peekable; +type Result = std::result::Result; + +// --------------------------------------------------------------------------- +// Query parsing +// --------------------------------------------------------------------------- + +/// Top-level entry: parse a single query node from the full input. +pub fn parse_query_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + let result = parse_query_node(&mut tokens)?; + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned(tok, "unexpected token after query")); + } + Ok(result) +} + +/// Parse a single query node (possibly with a trailing `@capture`). +fn parse_query_node(tokens: &mut Tokens) -> Result { + let base = parse_query_atom(tokens)?; + // Check for trailing @capture + if peek_is_at(tokens) { + tokens.next(); // consume @ + let capture_name = expect_ident(tokens, "expected capture name after @")?; + let name_str = capture_name.to_string(); + Ok(quote! { + yeast::query::QueryNode::Capture { + capture: #name_str, + node: Box::new(#base), + } + }) + } else { + Ok(base) + } +} + +/// Parse a query atom: `(kind fields...)` or `(kind fields... bare_children...)`. +/// Does not handle `@capture` — that's handled by the caller as a postfix. +fn parse_query_atom(tokens: &mut Tokens) -> Result { + match tokens.peek() { + None => Err(syn::Error::new( + Span::call_site(), + "unexpected end of query", + )), + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => { + let group = expect_group(tokens, Delimiter::Parenthesis)?; + let mut inner = group.stream().into_iter().peekable(); + let result = parse_query_node_inner(&mut inner)?; + if let Some(tok) = inner.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected token in query node", + )); + } + Ok(result) + } + Some(tok) => Err(syn::Error::new_spanned( + tok.clone(), + "expected `(` in query; use `(_) @name` to capture a wildcard", + )), + } +} + +/// Parse the inside of a parenthesized query node: `kind fields...` or `_` or `"lit"`. +fn parse_query_node_inner(tokens: &mut Tokens) -> Result { + match tokens.peek() { + None => Err(syn::Error::new( + Span::call_site(), + "empty parenthesized group in query", + )), + Some(TokenTree::Ident(id)) if *id == "_" => { + tokens.next(); + Ok(quote! { yeast::query::QueryNode::Any() }) + } + Some(TokenTree::Literal(_)) => { + let lit = expect_literal(tokens)?; + Ok(quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } }) + } + Some(TokenTree::Ident(_)) => { + let kind = expect_ident(tokens, "expected node kind")?; + let kind_str = kind.to_string(); + let fields = parse_query_fields(tokens)?; + Ok(quote! { + yeast::query::QueryNode::Node { + kind: #kind_str, + children: vec![#(#fields),*], + } + }) + } + Some(tok) => Err(syn::Error::new_spanned( + tok.clone(), + "expected node kind, `_`, or string literal", + )), + } +} + +/// Parse zero or more field specifications and trailing bare patterns. +/// Named fields: `name: pattern` or `name*: (list...)`. +/// Bare patterns (no field name) become implicit `child` field entries. +fn parse_query_fields(tokens: &mut Tokens) -> Result> { + let mut fields = Vec::new(); + while tokens.peek().is_some() { + if peek_is_field(tokens) { + let field_name = expect_ident(tokens, "expected field name")?; + let field_str = field_name.to_string(); + + expect_punct(tokens, ':', "expected `:` after field name")?; + + let child = parse_query_node(tokens)?; + fields.push(quote! { + (#field_str, vec![yeast::query::QueryListElem::SingleNode(#child)]) + }); + } else { + // Bare patterns — collect as implicit `child` field + let elems = parse_query_list(tokens)?; + if !elems.is_empty() { + fields.push(quote! { + ("child", vec![#(#elems),*]) + }); + } + break; + } + } + Ok(fields) +} + +/// Parse a list of query elements (bare children). +/// Each element is a node pattern, possibly followed by `*`, `+`, `?`. +fn parse_query_list(tokens: &mut Tokens) -> Result> { + let mut elems = Vec::new(); + while tokens.peek().is_some() { + // Check for parenthesized group + if peek_is_group(tokens, Delimiter::Parenthesis) { + let group = expect_group(tokens, Delimiter::Parenthesis)?; + let mut inner = group.stream().into_iter().peekable(); + + // Check for repetition after the group + if peek_is_repetition(tokens) { + let rep = expect_repetition(tokens)?; + // Determine if the group is a single node pattern or a list + // of patterns. If it starts with an identifier (node kind) or + // `_`, treat it as a single repeated node. Otherwise, parse + // as a repeated list of sub-patterns. + let is_single_node = matches!(inner.peek(), Some(TokenTree::Ident(_))); + if is_single_node { + let node = parse_query_node_inner(&mut inner)?; + let elem = quote! { + yeast::query::QueryListElem::Repeated { + children: vec![yeast::query::QueryListElem::SingleNode(#node)], + rep: #rep, + } + }; + let elem = maybe_wrap_list_capture(tokens, elem)?; + elems.push(elem); + } else { + let sub_elems = parse_query_list(&mut inner)?; + let elem = quote! { + yeast::query::QueryListElem::Repeated { + children: vec![#(#sub_elems),*], + rep: #rep, + } + }; + let elem = maybe_wrap_list_capture(tokens, elem)?; + elems.push(elem); + } + } else { + // Single parenthesized node, possibly followed by @capture + let node = parse_query_node_inner(&mut inner)?; + let node = maybe_wrap_capture(tokens, node)?; + elems.push(quote! { + yeast::query::QueryListElem::SingleNode(#node) + }); + } + continue; + } + + // Check for string literal (unnamed node) + if peek_is_literal(tokens) { + let lit = expect_literal(tokens)?; + let node = quote! { yeast::query::QueryNode::UnnamedNode { kind: #lit } }; + let elem = maybe_wrap_repetition( + tokens, + quote! { + yeast::query::QueryListElem::SingleNode(#node) + }, + )?; + elems.push(elem); + continue; + } + + // Check for bare _ (wildcard), possibly followed by @capture + if peek_is_underscore(tokens) { + tokens.next(); + let node = quote! { yeast::query::QueryNode::Any() }; + let node = maybe_wrap_capture(tokens, node)?; + let elem = maybe_wrap_repetition( + tokens, + quote! { + yeast::query::QueryListElem::SingleNode(#node) + }, + )?; + elems.push(elem); + continue; + } + + break; + } + Ok(elems) +} + +// --------------------------------------------------------------------------- +// tree! / trees! parsing — direct code generation against BuildCtx +// --------------------------------------------------------------------------- + +const IMPLICIT_CTX: &str = "__yeast_ctx"; + +/// Determine the context identifier: either explicit `ctx,` or the implicit +/// `__yeast_ctx` from an enclosing `rule!`. +fn parse_ctx_or_implicit(tokens: &mut Tokens) -> Ident { + // Check if first token is an ident followed by a comma + let mut lookahead = tokens.clone(); + let is_explicit = matches!(lookahead.next(), Some(TokenTree::Ident(_))) + && matches!(lookahead.next(), Some(TokenTree::Punct(p)) if p.as_char() == ','); + + if is_explicit { + let ctx = expect_ident(tokens, "").unwrap(); + let _ = tokens.next(); // consume comma + ctx + } else { + Ident::new(IMPLICIT_CTX, Span::call_site()) + } +} + +/// Parse `tree!(ctx, (template))` or `tree!((template))` — returns single `Id`. +pub fn parse_tree_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + let ctx = parse_ctx_or_implicit(&mut tokens); + + let first = parse_direct_node(&mut tokens, &ctx)?; + + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected tokens after tree! template; use trees! for multiple nodes", + )); + } + + Ok(quote! { { #first } }) +} + +/// Parse `trees!(ctx, ...)` or `trees!(...)` — returns `Vec`. +pub fn parse_trees_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + let ctx = parse_ctx_or_implicit(&mut tokens); + let items = parse_direct_list(&mut tokens, &ctx)?; + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected token after trees! template", + )); + } + Ok(quote! { + { + let mut __nodes: Vec = Vec::new(); + #(#items)* + __nodes + } + }) +} + +/// Parse a single node template and generate code that returns an `Id`. +/// Handles: `(kind fields... children...)` and `{expr}`. +fn parse_direct_node(tokens: &mut Tokens, ctx: &Ident) -> Result { + match tokens.peek() { + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Brace => { + let group = expect_group(tokens, Delimiter::Brace)?; + let expr = group.stream(); + Ok(quote! { #expr }) + } + Some(TokenTree::Group(g)) if g.delimiter() == Delimiter::Parenthesis => { + let group = expect_group(tokens, Delimiter::Parenthesis)?; + let mut inner = group.stream().into_iter().peekable(); + parse_direct_node_inner(&mut inner, ctx) + } + Some(tok) => Err(syn::Error::new_spanned( + tok.clone(), + "expected `(` or `{` in tree template", + )), + None => Err(syn::Error::new( + Span::call_site(), + "unexpected end of tree template", + )), + } +} + +/// Parse the inside of a parenthesized node: `kind fields... children...` +/// or `kind "literal"` or `kind $fresh`. +fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result { + let kind = expect_ident(tokens, "expected node kind")?; + let kind_str = kind.to_string(); + + // Check for (kind "literal") + if peek_is_literal(tokens) { + let lit = expect_literal(tokens)?; + return Ok(quote! { #ctx.literal(#kind_str, #lit) }); + } + + // Check for (kind #{expr}) — computed literal, expr converted via .to_string() + if peek_is_hash(tokens) { + tokens.next(); // consume # + let group = expect_group(tokens, Delimiter::Brace)?; + let expr = group.stream(); + return Ok(quote! { #ctx.literal(#kind_str, &(#expr).to_string()) }); + } + + // Check for (kind $fresh) + if peek_is_dollar(tokens) { + tokens.next(); + let name = expect_ident(tokens, "expected fresh variable name after $")?; + let name_str = name.to_string(); + return Ok(quote! { #ctx.fresh(#kind_str, #name_str) }); + } + + // Parse named fields + let mut stmts = Vec::new(); + let mut field_args = Vec::new(); + let mut field_counter = 0usize; + + // Named fields — compute each value into a temp, then reference it + while peek_is_field(tokens) { + let field_name = expect_ident(tokens, "expected field name")?; + let field_str = field_name.to_string(); + expect_punct(tokens, ':', "expected `:` after field name")?; + let temp = Ident::new( + &format!("__field_{field_str}_{field_counter}"), + Span::call_site(), + ); + field_counter += 1; + + // Check for field: {..expr} — splice a Vec into the field + if peek_is_group(tokens, Delimiter::Brace) { + let group_clone = tokens.clone().next().unwrap(); + if let TokenTree::Group(g) = &group_clone { + let mut inner_check = g.stream().into_iter(); + let is_splice = matches!(inner_check.next(), Some(TokenTree::Punct(p)) if p.as_char() == '.') + && matches!(inner_check.next(), Some(TokenTree::Punct(p)) if p.as_char() == '.'); + if is_splice { + let group = expect_group(tokens, Delimiter::Brace)?; + let mut inner = group.stream().into_iter().peekable(); + inner.next(); // consume first . + inner.next(); // consume second . + let expr: proc_macro2::TokenStream = inner.collect(); + stmts.push(quote! { let #temp: Vec = #expr; }); + field_args.push(quote! { (#field_str, #temp) }); + continue; + } + } + } + + let value = parse_direct_node(tokens, ctx)?; + stmts.push(quote! { let #temp = #value; }); + field_args.push(quote! { (#field_str, vec![#temp]) }); + } + + // After all named fields, no other tokens are allowed. + // Output templates require all children to be in named fields. + if let Some(tok) = tokens.peek() { + return Err(syn::Error::new_spanned( + tok.clone(), + "expected named field (`name:`) or end of node template; \ + output templates do not support unnamed children", + )); + } + + Ok(quote! { + { + #(#stmts)* + #ctx.node(#kind_str, vec![#(#field_args),*]) + } + }) +} + +/// Parse the top-level list of a `trees!` template. +/// Each item is a node template or `{expr}` splice. +fn parse_direct_list(tokens: &mut Tokens, ctx: &Ident) -> Result> { + let mut items = Vec::new(); + while tokens.peek().is_some() { + if peek_is_group(tokens, Delimiter::Parenthesis) { + let group = expect_group(tokens, Delimiter::Parenthesis)?; + let mut inner = group.stream().into_iter().peekable(); + + // Regular node + let node = parse_direct_node_inner(&mut inner, ctx)?; + items.push(quote! { __nodes.push(#node); }); + continue; + } + + // {expr} or {..expr} — single node or splice + if peek_is_group(tokens, Delimiter::Brace) { + let group = expect_group(tokens, Delimiter::Brace)?; + let mut inner = group.stream().into_iter().peekable(); + if peek_is_dotdot(&inner) { + inner.next(); // consume first . + inner.next(); // consume second . + let expr: TokenStream = inner.collect(); + items.push(quote! { __nodes.extend(#expr); }); + } else { + let expr = group.stream(); + items.push(quote! { __nodes.push(#expr); }); + } + continue; + } + + break; + } + Ok(items) +} + +// --------------------------------------------------------------------------- +// rule! parsing +// --------------------------------------------------------------------------- + +/// A captured variable from a query pattern. +struct CaptureInfo { + name: String, + multiplicity: CaptureMultiplicity, +} + +#[derive(Clone, Copy, PartialEq)] +enum CaptureMultiplicity { + /// Exactly one match (bare pattern or after no quantifier) + Single, + /// Zero or one match (after `?`) + Optional, + /// Zero or more matches (after `*` or `+`, or inside a repeated group) + Repeated, +} + +/// Walk a token stream and extract all `@name` captures, noting whether +/// they appear after `*` or `+` (repeated) or not. +fn extract_captures(stream: &TokenStream) -> Vec { + let mut captures = Vec::new(); + extract_captures_inner( + &mut stream.clone().into_iter().peekable(), + &mut captures, + CaptureMultiplicity::Single, + ); + captures +} + +fn extract_captures_inner( + tokens: &mut Tokens, + captures: &mut Vec, + parent_mult: CaptureMultiplicity, +) { + let mut last_mult = CaptureMultiplicity::Single; + while let Some(tok) = tokens.next() { + match tok { + TokenTree::Group(g) => { + let mut inner = g.stream().into_iter().peekable(); + let group_mult = match tokens.peek() { + Some(TokenTree::Punct(p)) if p.as_char() == '*' || p.as_char() == '+' => { + CaptureMultiplicity::Repeated + } + Some(TokenTree::Punct(p)) if p.as_char() == '?' => { + CaptureMultiplicity::Optional + } + _ => CaptureMultiplicity::Single, + }; + last_mult = group_mult; + let child_mult = if parent_mult == CaptureMultiplicity::Repeated + || group_mult == CaptureMultiplicity::Repeated + { + CaptureMultiplicity::Repeated + } else if parent_mult == CaptureMultiplicity::Optional + || group_mult == CaptureMultiplicity::Optional + { + CaptureMultiplicity::Optional + } else { + CaptureMultiplicity::Single + }; + extract_captures_inner(&mut inner, captures, child_mult); + } + TokenTree::Punct(p) if p.as_char() == '@' => { + if let Some(TokenTree::Ident(name)) = tokens.next() { + let mult = if parent_mult == CaptureMultiplicity::Repeated + || last_mult == CaptureMultiplicity::Repeated + { + CaptureMultiplicity::Repeated + } else if parent_mult == CaptureMultiplicity::Optional + || last_mult == CaptureMultiplicity::Optional + { + CaptureMultiplicity::Optional + } else { + CaptureMultiplicity::Single + }; + captures.push(CaptureInfo { + name: name.to_string(), + multiplicity: mult, + }); + } + last_mult = CaptureMultiplicity::Single; + } + TokenTree::Punct(p) if matches!(p.as_char(), '*' | '+' | '?') => { + // Keep last_mult — the @capture follows + } + _ => { + last_mult = CaptureMultiplicity::Single; + } + } + } +} + +/// Parse `rule!( query => transform )`. +pub fn parse_rule_top(input: TokenStream) -> Result { + let mut tokens = input.into_iter().peekable(); + + // Collect query tokens up to `=>` + let mut query_tokens = Vec::new(); + loop { + match tokens.peek() { + None => return Err(syn::Error::new(Span::call_site(), "expected `=>` in rule!")), + Some(TokenTree::Punct(p)) if p.as_char() == '=' => { + let eq = tokens.next().unwrap(); + match tokens.peek() { + Some(TokenTree::Punct(p)) if p.as_char() == '>' => { + tokens.next(); // consume > + break; + } + _ => { + query_tokens.push(eq); + continue; + } + } + } + _ => { + query_tokens.push(tokens.next().unwrap()); + } + } + } + + let query_stream: TokenStream = query_tokens.into_iter().collect(); + + // Extract captures from query + let captures = extract_captures(&query_stream); + + // Parse query + let query_code = parse_query_top(query_stream.clone())?; + + // Generate capture bindings + let ctx_ident = Ident::new(IMPLICIT_CTX, Span::call_site()); + let bindings: Vec = captures + .iter() + .map(|cap| { + let name = Ident::new(&cap.name, Span::call_site()); + let name_str = &cap.name; + match cap.multiplicity { + CaptureMultiplicity::Repeated => { + quote! { let #name: Vec = __captures.get_all(#name_str); } + } + CaptureMultiplicity::Optional => { + quote! { let #name: Option = __captures.get_opt(#name_str); } + } + CaptureMultiplicity::Single => { + quote! { let #name: usize = __captures.get_var(#name_str).unwrap(); } + } + } + }) + .collect(); + + // Parse transform: either shorthand `=> kind_name` or full `=> (template ...)` + let transform_body = if peek_is_field(&mut tokens) && { + // Shorthand form: bare identifier = output node kind. + // Auto-generate template from captures. + let mut lookahead = tokens.clone(); + lookahead.next(); // skip ident + lookahead.peek().is_none() // nothing after = shorthand + } { + let output_kind = expect_ident(&mut tokens, "expected output node kind")?; + let output_kind_str = output_kind.to_string(); + + // Generate field assignments from captures + let field_stmts: Vec = captures + .iter() + .map(|cap| { + let name = Ident::new(&cap.name, Span::call_site()); + let name_str = &cap.name; + match cap.multiplicity { + CaptureMultiplicity::Repeated => quote! { + let __field_id = #ctx_ident.ast.field_id_for_name(#name_str) + .unwrap_or_else(|| panic!("field '{}' not found", #name_str)); + __fields.insert(__field_id, #name); + }, + CaptureMultiplicity::Optional => quote! { + let __field_id = #ctx_ident.ast.field_id_for_name(#name_str) + .unwrap_or_else(|| panic!("field '{}' not found", #name_str)); + if let Some(__id) = #name { + __fields.entry(__field_id).or_insert_with(Vec::new).push(__id); + } + }, + CaptureMultiplicity::Single => quote! { + let __field_id = #ctx_ident.ast.field_id_for_name(#name_str) + .unwrap_or_else(|| panic!("field '{}' not found", #name_str)); + __fields.entry(__field_id).or_insert_with(Vec::new).push(#name); + }, + } + }) + .collect(); + + quote! { + let __kind = #ctx_ident.ast.id_for_node_kind(#output_kind_str) + .unwrap_or_else(|| panic!("node kind '{}' not found", #output_kind_str)); + let mut __fields = std::collections::BTreeMap::new(); + #(#field_stmts)* + let __id = #ctx_ident.ast.create_node_with_range( + __kind, + yeast::NodeContent::DynamicString(String::new()), + __fields, + true, + __source_range, + ); + vec![__id] + } + } else { + // Full template form + let transform_items = parse_direct_list(&mut tokens, &ctx_ident)?; + + if let Some(tok) = tokens.next() { + return Err(syn::Error::new_spanned( + tok, + "unexpected token after rule! transform", + )); + } + + quote! { + let mut __nodes: Vec = Vec::new(); + #(#transform_items)* + __nodes + } + }; + + Ok(quote! { + { + let __query = #query_code; + yeast::Rule::new(__query, Box::new(|__ast: &mut yeast::Ast, __captures: yeast::captures::Captures, __fresh: &yeast::tree_builder::FreshScope, __source_range: Option| { + #(#bindings)* + let mut #ctx_ident = yeast::build::BuildCtx::with_source_range(__ast, &__captures, __fresh, __source_range); + #transform_body + })) + } + }) +} + +// --------------------------------------------------------------------------- +// Token utilities +// --------------------------------------------------------------------------- + +fn peek_is_at(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '@') +} + +fn peek_is_literal(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Literal(_))) +} + +fn peek_is_dollar(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '$') +} + +fn peek_is_hash(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Punct(p)) if p.as_char() == '#') +} + +/// Check for `..` (two consecutive dot punctuation tokens). +fn peek_is_dotdot(tokens: &Tokens) -> bool { + let mut lookahead = tokens.clone(); + matches!(lookahead.next(), Some(TokenTree::Punct(p)) if p.as_char() == '.') + && matches!(lookahead.next(), Some(TokenTree::Punct(p)) if p.as_char() == '.') +} + +fn peek_is_underscore(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Ident(id)) if *id == "_") +} + +/// Check if the next tokens form a field specification (ident followed by `:` or `*:`). +/// A bare identifier (other than `_`) at this position is always a field name, since +/// bare child patterns must start with `(`, `@`, `"literal"`, or `_`. +fn peek_is_field(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Ident(id)) if *id != "_") +} + +fn peek_is_group(tokens: &mut Tokens, delim: Delimiter) -> bool { + matches!(tokens.peek(), Some(TokenTree::Group(g)) if g.delimiter() == delim) +} + +fn peek_is_repetition(tokens: &mut Tokens) -> bool { + matches!(tokens.peek(), Some(TokenTree::Punct(p)) if matches!(p.as_char(), '*' | '+' | '?')) +} + +fn expect_ident(tokens: &mut Tokens, msg: &str) -> Result { + match tokens.next() { + Some(TokenTree::Ident(id)) => Ok(id), + Some(tok) => Err(syn::Error::new_spanned(tok, msg)), + None => Err(syn::Error::new(Span::call_site(), msg)), + } +} + +fn expect_literal(tokens: &mut Tokens) -> Result { + match tokens.next() { + Some(TokenTree::Literal(lit)) => Ok(lit), + Some(tok) => Err(syn::Error::new_spanned(tok, "expected string literal")), + None => Err(syn::Error::new( + Span::call_site(), + "expected string literal", + )), + } +} + +fn expect_punct(tokens: &mut Tokens, ch: char, msg: &str) -> Result<()> { + match tokens.next() { + Some(TokenTree::Punct(p)) if p.as_char() == ch => Ok(()), + Some(tok) => Err(syn::Error::new_spanned(tok, msg)), + None => Err(syn::Error::new(Span::call_site(), msg)), + } +} + +fn expect_group(tokens: &mut Tokens, delim: Delimiter) -> Result { + match tokens.next() { + Some(TokenTree::Group(g)) if g.delimiter() == delim => Ok(g), + Some(tok) => Err(syn::Error::new_spanned( + tok, + format!("expected {delim:?} group"), + )), + None => Err(syn::Error::new( + Span::call_site(), + format!("expected {delim:?} group"), + )), + } +} + +fn expect_repetition(tokens: &mut Tokens) -> Result { + match tokens.next() { + Some(TokenTree::Punct(p)) => match p.as_char() { + '*' => Ok(quote! { yeast::query::Rep::ZeroOrMore }), + '+' => Ok(quote! { yeast::query::Rep::OneOrMore }), + '?' => Ok(quote! { yeast::query::Rep::ZeroOrOne }), + _ => Err(syn::Error::new(p.span(), "expected `*`, `+`, or `?`")), + }, + Some(tok) => Err(syn::Error::new_spanned( + tok, + "expected repetition quantifier", + )), + None => Err(syn::Error::new( + Span::call_site(), + "expected repetition quantifier", + )), + } +} + +fn maybe_wrap_capture(tokens: &mut Tokens, base: TokenStream) -> Result { + if peek_is_at(tokens) { + tokens.next(); // consume @ + let name = expect_ident(tokens, "expected capture name after @")?; + let name_str = name.to_string(); + Ok(quote! { + yeast::query::QueryNode::Capture { + capture: #name_str, + node: Box::new(#base), + } + }) + } else { + Ok(base) + } +} + +fn maybe_wrap_repetition(tokens: &mut Tokens, single: TokenStream) -> Result { + if peek_is_repetition(tokens) { + let rep = expect_repetition(tokens)?; + Ok(quote! { + yeast::query::QueryListElem::Repeated { + children: vec![#single], + rep: #rep, + } + }) + } else { + Ok(single) + } +} + +/// If `@name` follows a Repeated list element, wrap each child SingleNode +/// inside the repetition with a Capture. This matches tree-sitter semantics +/// where `(_)* @name` captures each matched node. +fn maybe_wrap_list_capture(tokens: &mut Tokens, elem: TokenStream) -> Result { + if peek_is_at(tokens) { + tokens.next(); + let name = expect_ident(tokens, "expected capture name after @")?; + let name_str = name.to_string(); + // Re-parse the element isn't practical, so we generate a wrapper + // that creates a new Repeated with each child wrapped in a capture. + // The simplest approach: generate code that the runtime can interpret. + // Actually, the capture annotation on repeated elements is best handled + // by re-generating the Repeated with captures injected. + // For now, assume the common case: the repetition contains a single + // SingleNode child, and we wrap that node in a capture. + Ok(quote! { + { + let __rep = #elem; + match __rep { + yeast::query::QueryListElem::Repeated { children, rep } => { + yeast::query::QueryListElem::Repeated { + children: children.into_iter().map(|child| { + match child { + yeast::query::QueryListElem::SingleNode(node) => { + yeast::query::QueryListElem::SingleNode( + yeast::query::QueryNode::Capture { + capture: #name_str, + node: Box::new(node), + } + ) + } + other => other, + } + }).collect(), + rep, + } + } + other => other, + } + } + }) + } else { + Ok(elem) + } +} diff --git a/shared/yeast/Cargo.toml b/shared/yeast/Cargo.toml new file mode 100644 index 00000000000..166887c324c --- /dev/null +++ b/shared/yeast/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "yeast" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.4.10", features = ["derive"] } +serde = { version = "1.0.193", features = ["derive"] } +serde_json = "1.0.108" +serde_yaml = "0.9" +tree-sitter = ">= 0.23.0" +yeast-macros = { path = "../yeast-macros" } + +tree-sitter-ruby = "0.23" +tree-sitter-python = "0.23" diff --git a/shared/yeast/src/bin/main.rs b/shared/yeast/src/bin/main.rs new file mode 100644 index 00000000000..975c8e8b25f --- /dev/null +++ b/shared/yeast/src/bin/main.rs @@ -0,0 +1,26 @@ +use clap::Parser; + +#[derive(Parser)] +#[clap(name = "yeast", about = "yeast elaborates abstract syntax trees")] +struct Cli { + file: String, + #[clap(default_value = "ruby")] + language: String, +} + +fn get_language(language: &str) -> tree_sitter::Language { + match language { + "ruby" => tree_sitter_ruby::LANGUAGE.into(), + "python" => tree_sitter_python::LANGUAGE.into(), + _ => panic!("Unsupported language: {language}"), + } +} + +fn main() { + let args = Cli::parse(); + let language = get_language(&args.language); + let source = std::fs::read_to_string(&args.file).unwrap(); + let runner = yeast::Runner::new(language, &[]); + let ast = runner.run(&source).unwrap(); + println!("{}", ast.print(&source, ast.get_root())); +} diff --git a/shared/yeast/src/build.rs b/shared/yeast/src/build.rs new file mode 100644 index 00000000000..bee4c4f7d03 --- /dev/null +++ b/shared/yeast/src/build.rs @@ -0,0 +1,91 @@ +use std::collections::BTreeMap; + +use crate::captures::Captures; +use crate::tree_builder::FreshScope; +use crate::{Ast, FieldId, Id, NodeContent}; + +/// Context for building new AST nodes during a transformation. +/// +/// Used by the `tree!` and `trees!` macros. Holds a mutable reference to the +/// AST, a reference to the captures from a query match, and a `FreshScope` for +/// generating unique identifiers. +pub struct BuildCtx<'a> { + pub ast: &'a mut Ast, + pub captures: &'a Captures, + pub fresh: &'a FreshScope, + /// Source range of the matched node, inherited by synthetic nodes. + pub source_range: Option, +} + +impl<'a> BuildCtx<'a> { + pub fn new(ast: &'a mut Ast, captures: &'a Captures, fresh: &'a FreshScope) -> Self { + Self { + ast, + captures, + fresh, + source_range: None, + } + } + + pub fn with_source_range( + ast: &'a mut Ast, + captures: &'a Captures, + fresh: &'a FreshScope, + source_range: Option, + ) -> Self { + Self { + ast, + captures, + fresh, + source_range, + } + } + + /// Look up a capture variable, returning its node Id. + pub fn capture(&self, name: &str) -> Id { + self.captures + .get_var(name) + .unwrap_or_else(|e| panic!("build: {e}")) + } + + /// Get all values of a repeated capture variable. + pub fn capture_all(&self, name: &str) -> Vec { + self.captures.get_all(name) + } + + /// Create a named AST node with the given kind and fields. + pub fn node(&mut self, kind: &str, fields: Vec<(&str, Vec)>) -> Id { + let kind_id = self + .ast + .id_for_node_kind(kind) + .unwrap_or_else(|| panic!("build: node kind '{kind}' not found")); + let mut field_map: BTreeMap> = BTreeMap::new(); + for (name, ids) in fields { + let field_id = self + .ast + .field_id_for_name(name) + .unwrap_or_else(|| panic!("build: field '{name}' not found")); + field_map.entry(field_id).or_default().extend(ids); + } + self.ast.create_node_with_range( + kind_id, + NodeContent::DynamicString(String::new()), + field_map, + true, + self.source_range, + ) + } + + /// Create a leaf node with a fixed string content. + pub fn literal(&mut self, kind: &'static str, value: &str) -> Id { + self.ast + .create_named_token_with_range(kind, value.to_string(), self.source_range) + } + + /// Create a leaf node with an auto-generated unique name. + pub fn fresh(&mut self, kind: &'static str, name: &str) -> Id { + let generated = self.fresh.resolve(name); + self.ast + .create_named_token_with_range(kind, generated, self.source_range) + } +} diff --git a/shared/yeast/src/captures.rs b/shared/yeast/src/captures.rs new file mode 100644 index 00000000000..a92c5096e94 --- /dev/null +++ b/shared/yeast/src/captures.rs @@ -0,0 +1,105 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use crate::Id; + +#[derive(Debug, Clone)] +pub struct Captures { + captures: BTreeMap<&'static str, Vec>, +} + +impl Default for Captures { + fn default() -> Self { + Self::new() + } +} + +impl Captures { + pub fn new() -> Self { + Captures { + captures: BTreeMap::new(), + } + } + + pub fn get_var(&self, key: &str) -> Result { + let ids = self.captures.get(key); + if let Some(ids) = ids { + if ids.len() == 1 { + Ok(ids[0]) + } else { + Err(format!( + "Variable {} has {} matches, use * to allow repetition", + key, + ids.len() + )) + } + } else { + Err(format!("No variable named {key}")) + } + } + + /// Get all values of a capture variable (for repeated captures). + pub fn get_all(&self, key: &str) -> Vec { + self.captures.get(key).cloned().unwrap_or_default() + } + + /// Get an optional capture variable. Returns None if unmatched, + /// Some(id) if matched exactly once. + pub fn get_opt(&self, key: &str) -> Option { + self.captures + .get(key) + .and_then(|ids| if ids.len() == 1 { Some(ids[0]) } else { None }) + } + + pub fn insert(&mut self, key: &'static str, id: Id) { + self.captures.entry(key).or_default().push(id); + } + + pub fn map_captures(&mut self, kind: &str, f: &mut impl FnMut(Id) -> Id) { + if let Some(ids) = self.captures.get_mut(kind) { + for id in ids { + *id = f(*id); + } + } + } + pub fn map_captures_to(&mut self, from: &str, to: &'static str, f: &mut impl FnMut(Id) -> Id) { + if let Some(from_ids) = self.captures.get(from) { + let new_values = from_ids.iter().copied().map(f).collect(); + self.captures.insert(to, new_values); + } + } + + pub fn merge(&mut self, other: &Captures) { + for (key, ids) in &other.captures { + self.captures.entry(key).or_default().extend(ids); + } + } + + pub fn un_star<'a>( + &'a self, + children: &'a BTreeSet<&'static str>, + ) -> Result + 'a, String> { + let mut id_iter = children.iter(); + + if let Some(fst) = id_iter.next() { + let repeats = self + .captures + .get(fst) + .ok_or_else(|| format!("No variable named {fst}"))? + .len(); + // TODO: better error on missing capture + if id_iter.any(|id| self.captures.get(id).map(Vec::len).unwrap_or(0) != repeats) { + return Err("Repeated captures must have the same number of matches".to_string()); + } + Ok((0..repeats).map(move |iter| { + let mut new_vars: Captures = Captures::new(); + for id in children { + let child_capture = self.captures.get(id).unwrap()[iter]; + new_vars.captures.insert(id, vec![child_capture]); + } + new_vars + })) + } else { + Err("Repeated captures must have at least one capture".to_string()) + } + } +} diff --git a/shared/yeast/src/cursor.rs b/shared/yeast/src/cursor.rs new file mode 100644 index 00000000000..ef5f6d94f25 --- /dev/null +++ b/shared/yeast/src/cursor.rs @@ -0,0 +1,8 @@ +pub trait Cursor<'a, T, N, F> { + fn node(&self) -> &'a N; + fn field_id(&self) -> Option; + fn field_name(&self) -> Option<&'static str>; + fn goto_first_child(&mut self) -> bool; + fn goto_next_sibling(&mut self) -> bool; + fn goto_parent(&mut self) -> bool; +} diff --git a/shared/yeast/src/lib.rs b/shared/yeast/src/lib.rs new file mode 100644 index 00000000000..46629e19840 --- /dev/null +++ b/shared/yeast/src/lib.rs @@ -0,0 +1,727 @@ +use std::collections::BTreeMap; + +extern crate self as yeast; + +use serde::Serialize; +use serde_json::{json, Value}; + +pub mod build; +pub mod captures; +pub mod cursor; +pub mod dump; +pub mod node_types_yaml; +pub mod query; +mod range; +pub mod schema; +pub mod tree_builder; +mod visitor; + +pub use yeast_macros::{query, rule, tree, trees}; + +use captures::Captures; +pub use cursor::Cursor; +use query::QueryNode; + +/// Node ids are indexes into the arena +type Id = usize; + +/// Field and Kind ids are provided by tree-sitter +type FieldId = u16; +type KindId = u16; + +pub const CHILD_FIELD: u16 = u16::MAX; + +#[derive(Debug)] +pub struct AstCursor<'a> { + ast: &'a Ast, + /// A stack of parents, along with iterators for their children + parents: Vec<(&'a Node, ChildrenIter<'a>)>, + node: &'a Node, +} + +impl<'a> AstCursor<'a> { + pub fn new(ast: &'a Ast) -> Self { + // TODO: handle non-zero root + let node = ast.get_node(ast.root).unwrap(); + Self { + ast, + parents: vec![], + node, + } + } + + fn goto_next_sibling_opt(&mut self) -> Option<()> { + self.node = self.parents.last_mut()?.1.next()?; + Some(()) + } + + fn goto_first_child_opt(&mut self) -> Option<()> { + let parent = self.node; + let mut children = ChildrenIter::new(self.ast, parent); + let first_child = children.next()?; + self.node = first_child; + self.parents.push((parent, children)); + Some(()) + } + + fn goto_parent_opt(&mut self) -> Option<()> { + self.node = self.parents.pop()?.0; + Some(()) + } +} +impl<'a> Cursor<'a, Ast, Node, FieldId> for AstCursor<'a> { + fn node(&self) -> &'a Node { + self.node + } + + fn field_id(&self) -> Option { + let (_, children) = self.parents.last()?; + children.current_field() + } + + fn field_name(&self) -> Option<&'static str> { + if self.field_id() == Some(CHILD_FIELD) { + None + } else { + self.field_id() + .and_then(|id| self.ast.field_name_for_id(id)) + } + } + + fn goto_first_child(&mut self) -> bool { + self.goto_first_child_opt().is_some() + } + + fn goto_next_sibling(&mut self) -> bool { + self.goto_next_sibling_opt().is_some() + } + + fn goto_parent(&mut self) -> bool { + self.goto_parent_opt().is_some() + } +} + +/// An iterator over all the child nodes of a node. +#[derive(Debug)] +struct ChildrenIter<'a> { + ast: &'a Ast, + current_field: Option, + fields: std::collections::btree_map::Iter<'a, FieldId, Vec>, + field_children: Option>, +} + +impl<'a> ChildrenIter<'a> { + fn new(ast: &'a Ast, node: &'a Node) -> Self { + Self { + ast, + current_field: None, + fields: node.fields.iter(), + field_children: None, + } + } + + fn get_node(&self, id: Id) -> &'a Node { + self.ast.get_node(id).unwrap() + } + + fn current_field(&self) -> Option { + self.current_field + } +} + +impl<'a> Iterator for ChildrenIter<'a> { + type Item = &'a Node; + + fn next(&mut self) -> Option { + match self.field_children.as_mut() { + None => match self.fields.next() { + Some((field, children)) => { + self.current_field = Some(*field); + self.field_children = Some(children.iter()); + self.next() + } + None => None, + }, + Some(children) => match children.next() { + None => match self.fields.next() { + None => None, + Some((field, children)) => { + self.current_field = Some(*field); + self.field_children = Some(children.iter()); + self.next() + } + }, + Some(child_id) => Some(self.get_node(*child_id)), + }, + } + } +} + +/// Our AST +pub struct Ast { + root: Id, + nodes: Vec, + schema: schema::Schema, +} + +impl std::fmt::Debug for Ast { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Ast") + .field("root", &self.root) + .field("nodes", &self.nodes.len()) + .finish() + } +} + +impl Ast { + /// Construct an AST from a TS tree + pub fn from_tree(language: tree_sitter::Language, tree: &tree_sitter::Tree) -> Self { + let schema = schema::Schema::from_language(&language); + Self::from_tree_with_schema(schema, tree, &language) + } + + pub fn from_tree_with_schema( + schema: schema::Schema, + tree: &tree_sitter::Tree, + language: &tree_sitter::Language, + ) -> Self { + let mut visitor = visitor::Visitor::new(language.clone()); + visitor.visit(tree); + + visitor.build_with_schema(schema) + } + + pub fn walk(&self) -> AstCursor { + AstCursor::new(self) + } + + pub fn nodes(&self) -> &[Node] { + &self.nodes + } + + pub fn get_root(&self) -> Id { + self.root + } + + pub fn set_root(&mut self, root: Id) { + self.root = root; + } + + pub fn get_node(&self, id: Id) -> Option<&Node> { + self.nodes.get(id) + } + + pub fn print(&self, source: &str, root_id: Id) -> Value { + let root = &self.nodes()[root_id]; + self.print_node(root, source) + } + + pub fn create_node( + &mut self, + kind: KindId, + content: NodeContent, + fields: BTreeMap>, + is_named: bool, + ) -> Id { + self.create_node_with_range(kind, content, fields, is_named, None) + } + + pub fn create_node_with_range( + &mut self, + kind: KindId, + content: NodeContent, + fields: BTreeMap>, + is_named: bool, + source_range: Option, + ) -> Id { + let id = self.nodes.len(); + self.nodes.push(Node { + id, + kind, + kind_name: self.schema.node_kind_for_id(kind).unwrap(), + fields, + content, + is_missing: false, + is_error: false, + is_extra: false, + is_named, + source_range, + }); + id + } + + pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id { + self.create_named_token_with_range(kind, content, None) + } + + pub fn create_named_token_with_range( + &mut self, + kind: &'static str, + content: String, + source_range: Option, + ) -> Id { + let kind_id = self.schema.id_for_node_kind(kind).unwrap_or_else(|| { + panic!("create_named_token: node kind '{kind}' not found in schema") + }); + let id = self.nodes.len(); + self.nodes.push(Node { + id, + kind: kind_id, + kind_name: kind, + is_named: true, + is_missing: false, + is_error: false, + source_range, + is_extra: false, + fields: BTreeMap::new(), + content: NodeContent::DynamicString(content), + }); + id + } + + pub fn field_name_for_id(&self, id: FieldId) -> Option<&'static str> { + self.schema.field_name_for_id(id) + } + + pub fn field_id_for_name(&self, name: &str) -> Option { + self.schema.field_id_for_name(name) + } + + /// Print a node for debugging + fn print_node(&self, node: &Node, source: &str) -> Value { + let fields: BTreeMap<&'static str, Vec> = node + .fields + .iter() + .map(|(field_id, nodes)| { + let field_name = if field_id == &CHILD_FIELD { + "rest" + } else { + self.field_name_for_id(*field_id).unwrap() + }; + let nodes: Vec = nodes + .iter() + .map(|id| self.print_node(self.get_node(*id).unwrap(), source)) + .collect(); + (field_name, nodes) + }) + .collect(); + let mut value = BTreeMap::new(); + let kind = self.schema.node_kind_for_id(node.kind).unwrap(); + let content = match &node.content { + NodeContent::Range(range) => source[range.start_byte..range.end_byte].to_string(), + NodeContent::String(s) => s.to_string(), + NodeContent::DynamicString(s) => s.clone(), + }; + if fields.is_empty() { + value.insert(kind, json!(content)); + } else { + let mut fields: BTreeMap<_, _> = + fields.into_iter().map(|(k, v)| (k, json!(v))).collect(); + fields.insert("content", json!(content)); + value.insert(kind, json!(fields)); + } + json!(value) + } + + pub fn id_for_node_kind(&self, kind: &str) -> Option { + let id = self.schema.id_for_node_kind(kind).unwrap_or(0); + if id == 0 { + None + } else { + Some(id) + } + } + + fn id_for_unnamed_node_kind(&self, kind: &str) -> Option { + let id = self.schema.id_for_unnamed_node_kind(kind).unwrap_or(0); + if id == 0 { + None + } else { + Some(id) + } + } +} + +/// A node in our AST +#[derive(PartialEq, Eq, Debug, Clone, Serialize)] +pub struct Node { + id: Id, + kind: KindId, + kind_name: &'static str, + pub(crate) fields: BTreeMap>, + pub(crate) content: NodeContent, + /// For synthetic nodes, the source range of the original node they + /// were desugared from. Used for location information in TRAP output. + #[serde(skip)] + source_range: Option, + is_named: bool, + is_missing: bool, + is_extra: bool, + is_error: bool, +} + +impl Node { + pub fn id(&self) -> Id { + self.id + } + + pub fn kind(&self) -> &'static str { + self.kind_name + } + + pub fn kind_name(&self) -> &'static str { + self.kind_name + } + + pub fn is_named(&self) -> bool { + self.is_named + } + + pub fn is_missing(&self) -> bool { + self.is_missing + } + + pub fn is_extra(&self) -> bool { + self.is_extra + } + + pub fn is_error(&self) -> bool { + self.is_error + } + + fn fake_point(&self) -> tree_sitter::Point { + tree_sitter::Point { row: 0, column: 0 } + } + + pub fn start_position(&self) -> tree_sitter::Point { + match self.content { + NodeContent::Range(range) => range.start_point, + _ => self + .source_range + .map_or_else(|| self.fake_point(), |r| r.start_point), + } + } + + pub fn end_position(&self) -> tree_sitter::Point { + match self.content { + NodeContent::Range(range) => range.end_point, + _ => self + .source_range + .map_or_else(|| self.fake_point(), |r| r.end_point), + } + } + + pub fn start_byte(&self) -> usize { + match self.content { + NodeContent::Range(range) => range.start_byte, + _ => self.source_range.map_or(0, |r| r.start_byte), + } + } + + pub fn end_byte(&self) -> usize { + match self.content { + NodeContent::Range(range) => range.end_byte, + _ => self.source_range.map_or(0, |r| r.end_byte), + } + } + + pub fn byte_range(&self) -> std::ops::Range { + self.start_byte()..self.end_byte() + } + + pub fn opt_string_content(&self) -> Option { + match &self.content { + NodeContent::Range(_range) => None, + NodeContent::String(s) => Some(s.to_string()), + NodeContent::DynamicString(s) => Some(s.to_string()), + } + } +} + +/// The contents of a node is either a range in the original source file, +/// or a new string if the node is synthesized. +#[derive(PartialEq, Eq, Debug, Clone, Serialize)] +pub enum NodeContent { + Range(#[serde(with = "range::Range")] tree_sitter::Range), + String(&'static str), + DynamicString(String), +} + +impl From<&'static str> for NodeContent { + fn from(value: &'static str) -> Self { + NodeContent::String(value) + } +} + +impl From for NodeContent { + fn from(value: tree_sitter::Range) -> Self { + NodeContent::Range(value) + } +} + +/// The transform function for a rule: takes the AST, captured variables, a +/// fresh-name scope, and the source range of the matched node, and returns +/// the IDs of the replacement nodes. +pub type Transform = Box< + dyn Fn(&mut Ast, Captures, &tree_builder::FreshScope, Option) -> Vec + + Send + + Sync, +>; + +pub struct Rule { + query: QueryNode, + transform: Transform, +} + +impl Rule { + pub fn new(query: QueryNode, transform: Transform) -> Self { + Self { query, transform } + } + + fn try_rule( + &self, + ast: &mut Ast, + node: Id, + fresh: &tree_builder::FreshScope, + ) -> Result>, String> { + let mut captures = Captures::new(); + if self.query.do_match(ast, node, &mut captures)? { + fresh.next_scope(); + let source_range = ast.get_node(node).and_then(|n| match n.content { + NodeContent::Range(r) => Some(r), + _ => n.source_range, + }); + Ok(Some((self.transform)(ast, captures, fresh, source_range))) + } else { + Ok(None) + } + } +} + +const MAX_REWRITE_DEPTH: usize = 100; + +/// Index of rules by their root query kind for fast lookup. +struct RuleIndex<'a> { + /// Rules indexed by root node kind name. + by_kind: BTreeMap<&'static str, Vec<&'a Rule>>, + /// Rules with wildcard queries (Any) that apply to all nodes. + wildcard: Vec<&'a Rule>, +} + +impl<'a> RuleIndex<'a> { + fn new(rules: &'a [Rule]) -> Self { + let mut by_kind: BTreeMap<&'static str, Vec<&'a Rule>> = BTreeMap::new(); + let mut wildcard = Vec::new(); + for rule in rules { + match rule.query.root_kind() { + Some(kind) => by_kind.entry(kind).or_default().push(rule), + None => wildcard.push(rule), + } + } + Self { by_kind, wildcard } + } + + fn rules_for_kind(&self, kind: &str) -> impl Iterator { + self.by_kind + .get(kind) + .into_iter() + .flat_map(|v| v.iter()) + .chain(self.wildcard.iter()) + } +} + +fn apply_rules( + rules: &[Rule], + ast: &mut Ast, + id: Id, + fresh: &tree_builder::FreshScope, +) -> Result, String> { + let index = RuleIndex::new(rules); + apply_rules_inner(&index, ast, id, fresh, 0) +} + +fn apply_rules_inner( + index: &RuleIndex, + ast: &mut Ast, + id: Id, + fresh: &tree_builder::FreshScope, + rewrite_depth: usize, +) -> Result, String> { + if rewrite_depth > MAX_REWRITE_DEPTH { + return Err(format!( + "Desugaring exceeded maximum rewrite depth ({MAX_REWRITE_DEPTH}). \ + This likely indicates a non-terminating rule cycle." + )); + } + + let node_kind = ast.get_node(id).map(|n| n.kind()).unwrap_or(""); + for rule in index.rules_for_kind(node_kind) { + if let Some(result_node) = rule.try_rule(ast, id, fresh)? { + let mut results = Vec::new(); + for node in result_node { + results.extend(apply_rules_inner( + index, + ast, + node, + fresh, + rewrite_depth + 1, + )?); + } + return Ok(results); + } + } + + // Collect fields before recursing (avoids borrowing ast immutably during mutation) + let field_entries: Vec<(FieldId, Vec)> = ast.nodes[id] + .fields + .iter() + .map(|(&fid, children)| (fid, children.clone())) + .collect(); + + // recursively descend into all the fields + // Child traversal does not increment rewrite depth + let mut changed = false; + let mut new_fields = BTreeMap::new(); + for (field_id, children) in field_entries { + let mut new_children = Vec::new(); + for child_id in children { + let result = apply_rules_inner(index, ast, child_id, fresh, rewrite_depth)?; + if result.len() != 1 || result[0] != child_id { + changed = true; + } + new_children.extend(result); + } + new_fields.insert(field_id, new_children); + } + + if !changed { + return Ok(vec![id]); + } + + let mut node = ast.nodes[id].clone(); + node.fields = new_fields; + node.id = ast.nodes.len(); + ast.nodes.push(node); + Ok(vec![ast.nodes.len() - 1]) +} + +/// Configuration for a desugaring pass: a set of rules and an optional +/// output node-types schema (in YAML format). +/// +/// When attached to a `LanguageSpec` (in the shared tree-sitter extractor), +/// enables yeast-based AST rewriting before TRAP extraction. The same YAML +/// is used both to validate TRAP output (via JSON conversion) and to +/// resolve output-only node kinds and fields at runtime. +pub struct DesugaringConfig { + /// Rules to apply during desugaring. + pub rules: Vec, + /// Output node-types in YAML format. If `None`, the input grammar's + /// node types are used (i.e. the desugared AST has the same node types + /// as the tree-sitter grammar). + pub output_node_types_yaml: Option<&'static str>, +} + +impl DesugaringConfig { + pub fn new(rules: Vec) -> Self { + Self { + rules, + output_node_types_yaml: None, + } + } + + pub fn with_output_node_types_yaml(mut self, yaml: &'static str) -> Self { + self.output_node_types_yaml = Some(yaml); + self + } + + /// Build the yeast `Schema` for this config, given the input language. + /// If `output_node_types_yaml` is `None`, returns the schema derived from + /// the input grammar. + pub fn build_schema(&self, language: &tree_sitter::Language) -> Result { + match self.output_node_types_yaml { + Some(yaml) => node_types_yaml::schema_from_yaml_with_language(yaml, language), + None => Ok(schema::Schema::from_language(language)), + } + } +} + +pub struct Runner<'a> { + language: tree_sitter::Language, + schema: schema::Schema, + rules: &'a [Rule], +} + +impl<'a> Runner<'a> { + /// Create a runner using the input grammar's schema for output. + pub fn new(language: tree_sitter::Language, rules: &'a [Rule]) -> Self { + let schema = schema::Schema::from_language(&language); + Self { + language, + schema, + rules, + } + } + + /// Create a runner with separate input language and output schema. + pub fn with_schema( + language: tree_sitter::Language, + schema: &schema::Schema, + rules: &'a [Rule], + ) -> Self { + Self { + language, + schema: schema.clone(), + rules, + } + } + + /// Create a runner from a [`DesugaringConfig`]. + pub fn from_config( + language: tree_sitter::Language, + config: &'a DesugaringConfig, + ) -> Result { + let schema = config.build_schema(&language)?; + Ok(Self { + language, + schema, + rules: &config.rules, + }) + } + + pub fn run_from_tree(&self, tree: &tree_sitter::Tree) -> Result { + let fresh = tree_builder::FreshScope::new(); + let mut ast = Ast::from_tree_with_schema(self.schema.clone(), tree, &self.language); + let root = ast.get_root(); + let res = apply_rules(self.rules, &mut ast, root, &fresh)?; + if res.len() != 1 { + return Err(format!( + "Expected exactly one result node, got {}", + res.len() + )); + } + ast.set_root(res[0]); + Ok(ast) + } + + pub fn run(&self, input: &str) -> Result { + let fresh = tree_builder::FreshScope::new(); + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&self.language) + .map_err(|e| format!("Failed to set language: {e}"))?; + let tree = parser + .parse(input, None) + .ok_or_else(|| "Failed to parse input".to_string())?; + let mut ast = Ast::from_tree_with_schema(self.schema.clone(), &tree, &self.language); + let root = ast.get_root(); + let res = apply_rules(self.rules, &mut ast, root, &fresh)?; + if res.len() != 1 { + return Err(format!( + "Expected exactly one result node, got {}", + res.len() + )); + } + ast.set_root(res[0]); + Ok(ast) + } +} diff --git a/shared/yeast/src/query.rs b/shared/yeast/src/query.rs new file mode 100644 index 00000000000..223b3456919 --- /dev/null +++ b/shared/yeast/src/query.rs @@ -0,0 +1,228 @@ +use crate::{captures::Captures, Ast, Id}; + +#[derive(Debug, Clone)] +pub enum QueryNode { + Any(), + Node { + kind: &'static str, + children: Vec<(&'static str, Vec)>, + }, + UnnamedNode { + kind: &'static str, + }, + Capture { + capture: &'static str, + node: Box, + }, +} + +impl QueryNode { + /// Returns the root node kind this query matches, if it's specific. + /// Returns None for wildcards (Any) and captures wrapping wildcards. + pub fn root_kind(&self) -> Option<&'static str> { + match self { + QueryNode::Node { kind, .. } => Some(kind), + QueryNode::UnnamedNode { kind } => Some(kind), + QueryNode::Capture { node, .. } => node.root_kind(), + QueryNode::Any() => None, + } + } +} + +#[derive(Debug, Clone)] +pub enum QueryListElem { + Repeated { + children: Vec, + rep: Rep, + }, + SingleNode(QueryNode), +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum Rep { + ZeroOrMore, + OneOrMore, + ZeroOrOne, +} + +impl QueryNode { + /// Returns true if this query only matches named nodes (not unnamed tokens). + /// Used to skip unnamed children in positional matching, matching tree-sitter + /// semantics where `(_)` only matches named nodes. + fn matches_named_only(&self) -> bool { + match self { + QueryNode::Any() => true, + QueryNode::Node { .. } => true, + QueryNode::UnnamedNode { .. } => false, + QueryNode::Capture { node, .. } => node.matches_named_only(), + } + } + + pub fn do_match(&self, ast: &Ast, node: Id, matches: &mut Captures) -> Result { + match self { + QueryNode::Any() => Ok(true), + QueryNode::Node { kind, children } => { + let node = ast.get_node(node).unwrap(); + let target_kind = ast + .id_for_node_kind(kind) + .ok_or_else(|| format!("Node kind {kind} not found in language"))?; + if node.kind != target_kind { + return Ok(false); + } + for (field, field_children) in children { + let field_id = ast + .field_id_for_name(field) + .ok_or_else(|| format!("Field {field} not found in language"))?; + let empty = Vec::new(); + let mut child_iter = + node.fields.get(&field_id).unwrap_or(&empty).iter().cloned(); + if !match_children(field_children.iter(), ast, &mut child_iter, matches)? { + return Ok(false); + } + } + Ok(true) + } + QueryNode::UnnamedNode { kind } => { + let node = ast.get_node(node).unwrap(); + let target_kind = ast + .id_for_unnamed_node_kind(kind) + .ok_or_else(|| format!("unnamed Node kind {kind} not found in language"))?; + Ok(node.kind == target_kind) + } + QueryNode::Capture { + capture, + node: sub_query, + } => { + let matched = sub_query.do_match(ast, node, matches)?; + if matched { + matches.insert(capture, node); + } + Ok(matched) + } + } + } +} + +fn match_children<'a>( + child_matchers: impl Iterator, + ast: &Ast, + remaining_children: &mut (impl Iterator + Clone), + matches: &mut Captures, +) -> Result { + for child in child_matchers { + if !child.do_match(ast, remaining_children, matches)? { + return Ok(false); + } + } + Ok(true) +} + +impl QueryListElem { + fn do_match( + &self, + ast: &Ast, + remaining_children: &mut (impl Iterator + Clone), + matches: &mut Captures, + ) -> Result { + match self { + QueryListElem::Repeated { children, rep } => { + if children.is_empty() { + // Empty repetition always succeeds without consuming + return Ok(*rep != Rep::OneOrMore); + } + + let mut iters = 0; + + loop { + let matches_initial = matches.clone(); + let start = remaining_children.clone(); + let start_next = start.clone().next(); + if !match_children(children.iter(), ast, remaining_children, matches)? { + *remaining_children = start; + *matches = matches_initial; + break; + } + // Guard against zero-width matches: if the iterator + // didn't advance, break to avoid infinite looping. + let current_next = remaining_children.clone().next(); + if start_next == current_next { + break; + } + iters += 1; + if *rep == Rep::ZeroOrOne { + break; + } + } + if *rep == Rep::OneOrMore && iters == 0 { + // We didn't match any children but we were supposed to + Ok(false) + } else { + Ok(true) + } + } + QueryListElem::SingleNode(sub_query) => { + if sub_query.matches_named_only() { + // Skip unnamed children, matching tree-sitter semantics + // where (_) only matches named nodes. + loop { + match remaining_children.next() { + Some(child) => { + let node = ast.get_node(child).unwrap(); + if node.is_named() { + return sub_query.do_match(ast, child, matches); + } + // Skip unnamed child, continue to next + } + None => return Ok(false), + } + } + } else if let Some(child) = remaining_children.next() { + sub_query.do_match(ast, child, matches) + } else { + Ok(false) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::query::*; + #[test] + fn it_works() { + let query1: QueryNode = yeast::query!((_)); + println!("{query1:?}"); + let query2 = yeast::query!((foo)); + println!("{query2:?}"); + let query3 = yeast::query!((foo child: (_))); + println!("{query3:?}"); + let query4 = yeast::query!((foo (_)*)); + println!("{query4:?}"); + let query5: QueryNode = yeast::query!((foo (_)*)); + println!("{query5:?}"); + let query6: QueryNode = yeast::query!((_) @bar); + println!("{query6:?}"); + let query7: QueryNode = yeast::query!((foo child: (_) @bar)); + println!("{query7:?}"); + let query8: QueryNode = yeast::query!( + (assignment + left: (element_reference + object: (_) @obj + (_) @index + ) + right: (_) @rhs + ) + ); + println!("{query8:?}"); + let query9 = yeast::query!( + (program + child: (assignment + left: (_) @left + right: (_) @right + ) + ) + ); + println!("{query9:?}"); + } +} diff --git a/shared/yeast/src/range.rs b/shared/yeast/src/range.rs new file mode 100644 index 00000000000..ec670b438d5 --- /dev/null +++ b/shared/yeast/src/range.rs @@ -0,0 +1,21 @@ +//! (de)-serialize helpers for tree_sitter::Range + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +#[serde(remote = "tree_sitter::Point")] +pub struct Point { + pub row: usize, + pub column: usize, +} + +#[derive(Serialize, Deserialize)] +#[serde(remote = "tree_sitter::Range")] +pub struct Range { + pub start_byte: usize, + pub end_byte: usize, + #[serde(with = "Point")] + pub start_point: tree_sitter::Point, + #[serde(with = "Point")] + pub end_point: tree_sitter::Point, +} diff --git a/shared/yeast/src/schema.rs b/shared/yeast/src/schema.rs new file mode 100644 index 00000000000..0a33fd6e0ed --- /dev/null +++ b/shared/yeast/src/schema.rs @@ -0,0 +1,167 @@ +use std::collections::BTreeMap; + +use crate::{FieldId, KindId, CHILD_FIELD}; + +/// A schema defining node kinds and field names for the output AST. +/// Built from a node-types.yml file, independent of any tree-sitter grammar. +/// +/// # Memory management +/// +/// `register_field`/`register_kind`/`register_unnamed_kind` use `Box::leak` +/// to obtain `&'static str` names. This is intentional: the `&'static str` +/// names appear pervasively in `Node`, `AstCursor`, query patterns, and the +/// extractor's TRAP output, where adding a lifetime would propagate widely. +/// +/// The leak is bounded by the number of distinct kind/field names registered. +/// Schemas are expected to be constructed once per process (e.g. at extractor +/// startup) and reused. Repeated construction in long-running processes will +/// leak memory unboundedly and should be avoided. +#[derive(Clone)] +pub struct Schema { + field_ids: BTreeMap, + field_names: BTreeMap, + next_field_id: FieldId, + kind_ids: BTreeMap, + unnamed_kind_ids: BTreeMap, + kind_names: BTreeMap, + next_kind_id: KindId, +} + +impl Default for Schema { + fn default() -> Self { + Self::new() + } +} + +impl Schema { + pub fn new() -> Self { + Self { + field_ids: BTreeMap::new(), + field_names: BTreeMap::new(), + next_field_id: 1, // 0 is reserved + kind_ids: BTreeMap::new(), + unnamed_kind_ids: BTreeMap::new(), + kind_names: BTreeMap::new(), + next_kind_id: 1, // 0 is reserved + } + } + + /// Create a schema from a tree-sitter language, importing all its + /// known field and kind names. + pub fn from_language(language: &tree_sitter::Language) -> Self { + let mut schema = Self::new(); + // Import all field names, preserving tree-sitter's IDs + for id in 1..=language.field_count() as u16 { + if let Some(name) = language.field_name_for_id(id) { + schema.field_ids.insert(name.to_string(), id); + schema.field_names.insert(id, name); + if id >= schema.next_field_id { + schema.next_field_id = id + 1; + } + } + } + // Import all node kind names, preserving tree-sitter's IDs. + // Track named and unnamed variants separately. + // For named kinds, use the canonical ID from id_for_node_kind(name, true) + // since some languages have multiple IDs for the same named kind. + for id in 0..language.node_kind_count() as u16 { + if let Some(name) = language.node_kind_for_id(id) { + if !name.is_empty() { + let is_named = language.node_kind_is_named(id); + if is_named { + let canonical_id = language.id_for_node_kind(name, true); + if canonical_id != 0 && !schema.kind_ids.contains_key(name) { + schema.kind_ids.insert(name.to_string(), canonical_id); + schema.kind_names.insert(canonical_id, name); + } + } else { + // For unnamed kinds, only insert if we don't already have one + // (some languages have multiple unnamed IDs for the same text) + schema + .unnamed_kind_ids + .entry(name.to_string()) + .or_insert(id); + } + // Always track the name for any ID we encounter + schema.kind_names.entry(id).or_insert(name); + if id >= schema.next_kind_id { + schema.next_kind_id = id + 1; + } + } + } + } + schema + } + + /// Register a field name, returning its ID. + /// If already registered, returns the existing ID. + pub fn register_field(&mut self, name: &str) -> FieldId { + if name == "child" { + return CHILD_FIELD; + } + if let Some(&id) = self.field_ids.get(name) { + return id; + } + let id = self.next_field_id; + assert!(id < CHILD_FIELD, "too many fields"); + self.next_field_id += 1; + let leaked: &'static str = Box::leak(name.to_string().into_boxed_str()); + self.field_ids.insert(name.to_string(), id); + self.field_names.insert(id, leaked); + id + } + + /// Register a named node kind name, returning its ID. + /// If already registered, returns the existing ID. + pub fn register_kind(&mut self, name: &str) -> KindId { + if let Some(&id) = self.kind_ids.get(name) { + return id; + } + let id = self.next_kind_id; + self.next_kind_id += 1; + let leaked: &'static str = Box::leak(name.to_string().into_boxed_str()); + self.kind_ids.insert(name.to_string(), id); + self.kind_names.insert(id, leaked); + id + } + + /// Register an unnamed token kind (e.g. `"="`, `"end"`), returning its ID. + /// If already registered, returns the existing ID. + pub fn register_unnamed_kind(&mut self, name: &str) -> KindId { + if let Some(&id) = self.unnamed_kind_ids.get(name) { + return id; + } + let id = self.next_kind_id; + self.next_kind_id += 1; + let leaked: &'static str = Box::leak(name.to_string().into_boxed_str()); + self.unnamed_kind_ids.insert(name.to_string(), id); + self.kind_names.insert(id, leaked); + id + } + + pub fn field_id_for_name(&self, name: &str) -> Option { + if name == "child" { + return Some(CHILD_FIELD); + } + self.field_ids.get(name).copied() + } + + pub fn field_name_for_id(&self, id: FieldId) -> Option<&'static str> { + if id == CHILD_FIELD { + return Some("child"); + } + self.field_names.get(&id).copied() + } + + pub fn id_for_node_kind(&self, kind: &str) -> Option { + self.kind_ids.get(kind).copied() + } + + pub fn id_for_unnamed_node_kind(&self, kind: &str) -> Option { + self.unnamed_kind_ids.get(kind).copied() + } + + pub fn node_kind_for_id(&self, id: KindId) -> Option<&'static str> { + self.kind_names.get(&id).copied() + } +} diff --git a/shared/yeast/src/tree_builder.rs b/shared/yeast/src/tree_builder.rs new file mode 100644 index 00000000000..c735c272d28 --- /dev/null +++ b/shared/yeast/src/tree_builder.rs @@ -0,0 +1,43 @@ +use std::cell::Cell; +use std::collections::BTreeMap; + +/// Tracks fresh identifier generation during a single tree-building operation. +/// All occurrences of the same `$name` within one build share the same generated value. +pub struct FreshScope { + counter: Cell, + resolved: std::cell::RefCell>, +} + +impl Default for FreshScope { + fn default() -> Self { + Self::new() + } +} + +impl FreshScope { + pub fn new() -> Self { + Self { + counter: Cell::new(0), + resolved: std::cell::RefCell::new(BTreeMap::new()), + } + } + + pub fn resolve(&self, name: &str) -> String { + self.resolved + .borrow_mut() + .entry(name.to_string()) + .or_insert_with(|| { + let id = self.counter.get(); + self.counter.set(id + 1); + format!("${name}-{id}") + }) + .clone() + } + + /// Clear resolved names but keep the counter. Called between rule + /// applications so that `$tmp` in different rules gets different values + /// while the counter increases monotonically. + pub fn next_scope(&self) { + self.resolved.borrow_mut().clear(); + } +} diff --git a/shared/yeast/src/visitor.rs b/shared/yeast/src/visitor.rs new file mode 100644 index 00000000000..655aa01e6b3 --- /dev/null +++ b/shared/yeast/src/visitor.rs @@ -0,0 +1,111 @@ +use std::collections::BTreeMap; +use tree_sitter::{Language, Tree}; + +use crate::{Ast, Id, Node, NodeContent, CHILD_FIELD}; + +#[derive(Debug)] +struct VisitorNode { + inner: Node, + parent: Option, +} + +/// A type that can walk a TS tree and produce an `Ast`. +#[derive(Debug)] +pub(crate) struct Visitor { + nodes: Vec, + current: Option, + language: Language, +} + +impl Visitor { + pub fn new(language: Language) -> Self { + Self { + nodes: Vec::new(), + current: None, + language, + } + } + + pub fn visit(&mut self, tree: &Tree) { + let cursor = &mut tree.walk(); + self.enter_node(cursor.node()); + let mut recurse = true; + loop { + if recurse && cursor.goto_first_child() { + recurse = self.enter_node(cursor.node()); + } else { + self.leave_node(cursor.field_name(), cursor.node()); + + if cursor.goto_next_sibling() { + recurse = self.enter_node(cursor.node()); + } else if cursor.goto_parent() { + recurse = false; + } else { + break; + } + } + } + } + + pub fn build_with_schema(self, schema: crate::schema::Schema) -> Ast { + Ast { + root: self.nodes[0].inner.id, + schema, + nodes: self.nodes.into_iter().map(|n| n.inner).collect(), + } + } + + fn add_node(&mut self, n: tree_sitter::Node<'_>, content: NodeContent, is_named: bool) -> Id { + let id = self.nodes.len(); + self.nodes.push(VisitorNode { + inner: Node { + id, + kind: self.language.id_for_node_kind(n.kind(), is_named), + kind_name: n.kind(), + content, + fields: BTreeMap::new(), + is_missing: n.is_missing(), + is_named: n.is_named(), + is_extra: n.is_extra(), + is_error: n.is_error(), + source_range: None, + }, + parent: self.current, + }); + id + } + + fn enter_node(&mut self, node: tree_sitter::Node<'_>) -> bool { + let id = self.add_node(node, node.range().into(), node.is_named()); + self.current = Some(id); + true + } + + fn leave_node(&mut self, field_name: Option<&'static str>, _node: tree_sitter::Node<'_>) { + let node = self.current.map(|i| &self.nodes[i]).unwrap(); + let node_id = node.inner.id; + let node_parent = node.parent; + + if let Some(parent_id) = node.parent { + let parent = self.nodes.get_mut(parent_id).unwrap(); + if let Some(field) = field_name { + let field_id = self.language.field_id_for_name(field).unwrap().get(); + parent + .inner + .fields + .entry(field_id) + .or_default() + .push(node_id); + } else { + parent + .inner + .fields + .entry(CHILD_FIELD) + .or_default() + .push(node_id); + } + } + + self.current = node_parent; + } +} From 8a9e53cc58041e34f844c00391421bc962de405e Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:27 +0000 Subject: [PATCH 04/11] yeast: Add YAML node-types format and converter Human-friendly YAML alternative to tree-sitter node-types.json with three sections: supertypes, named, unnamed. Supports bidirectional conversion and building Schema objects from YAML. Includes CLI binary (node_types_yaml) and documentation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/yeast/doc/node-types-yaml.md | 241 ++++++++ shared/yeast/src/bin/node_types_yaml.rs | 51 ++ shared/yeast/src/node_types_yaml.rs | 722 ++++++++++++++++++++++++ 3 files changed, 1014 insertions(+) create mode 100644 shared/yeast/doc/node-types-yaml.md create mode 100644 shared/yeast/src/bin/node_types_yaml.rs create mode 100644 shared/yeast/src/node_types_yaml.rs diff --git a/shared/yeast/doc/node-types-yaml.md b/shared/yeast/doc/node-types-yaml.md new file mode 100644 index 00000000000..b887f5a82bb --- /dev/null +++ b/shared/yeast/doc/node-types-yaml.md @@ -0,0 +1,241 @@ +# YAML Node Types Format + +The YAML node-types format is a human-friendly alternative to tree-sitter's +`node-types.json`. It can be converted to and from JSON using the +`node_types_yaml` tool. + +## Overview + +A YAML node-types file has three top-level sections: + +```yaml +supertypes: + # Abstract union types + +named: + # Concrete AST nodes and leaf tokens + +unnamed: + # Punctuation and keyword tokens +``` + +All three sections are optional. If omitted, they default to empty. + +## Supertypes + +Supertypes are abstract groupings of node types (unions). Each supertype maps +to a list of its members: + +```yaml +supertypes: + _expression: + - assignment + - binary + - identifier + - call +``` + +This corresponds to the following JSON: + +```json +{ + "type": "_expression", + "named": true, + "subtypes": [ + { "type": "assignment", "named": true }, + { "type": "binary", "named": true }, + { "type": "identifier", "named": true }, + { "type": "call", "named": true } + ] +} +``` + +Members are resolved as named or unnamed using the +[type reference rules](#type-references) described below. + +## Named nodes + +Named nodes are concrete AST node types. Each entry is a node kind mapping to +its fields. A node with no fields (a leaf token like `identifier`) uses an +empty value: + +```yaml +named: + identifier: + constant: +``` + +```json +{"type": "identifier", "named": true, "fields": {}}, +{"type": "constant", "named": true, "fields": {}} +``` + +### Fields + +Each field has a name, a multiplicity suffix, and a list of allowed types. + +| Suffix | Meaning | JSON `multiple` | JSON `required` | +| ------ | ------------ | --------------- | --------------- | +| (none) | exactly one | `false` | `true` | +| `?` | zero or one | `false` | `false` | +| `+` | one or more | `true` | `true` | +| `*` | zero or more | `true` | `false` | + +Example: + +```yaml +named: + assignment: + left: _lhs + right: _expression +``` + +```json +{ + "type": "assignment", + "named": true, + "fields": { + "left": { + "multiple": false, + "required": true, + "types": [{ "type": "_lhs", "named": true }] + }, + "right": { + "multiple": false, + "required": true, + "types": [{ "type": "_expression", "named": true }] + } + } +} +``` + +A field with multiple allowed types uses a list: + +```yaml +named: + binary: + left: [_expression, _simple_numeric] + operator: ["!=", "+", "&&"] + right: _expression +``` + +A singleton list can be written as a bare value (as shown with `right` above). + +### Unnamed children + +Unnamed children (nodes that appear as children without a field name) are +specified using the special `$children` field name, with the same suffixes: + +```yaml +named: + argument_list: + $children*: [_expression, block_argument, splat_argument] +``` + +```json +{ + "type": "argument_list", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { "type": "_expression", "named": true }, + { "type": "block_argument", "named": true }, + { "type": "splat_argument", "named": true } + ] + } +} +``` + +## Unnamed tokens + +Unnamed tokens are punctuation, operators, and keywords that appear in the +parse tree but don't have their own AST node type. They are listed as simple +strings: + +```yaml +unnamed: + - "=" + - "end" + - "+" + - "&&" +``` + +```json +{"type": "=", "named": false}, +{"type": "end", "named": false}, +{"type": "+", "named": false}, +{"type": "&&", "named": false} +``` + +When converting to YAML, unnamed tokens are always wrapped in quotes for +visual clarity. This is purely cosmetic — YAML treats `end` and `"end"` as +the same string. + +## Type references + +When a type name appears in a field's type list or a supertype's member list, +it needs to be resolved as either named or unnamed. The rules are: + +1. If the name only appears in `named` or `supertypes`, it is **named**. +2. If the name only appears in `unnamed`, it is **unnamed**. +3. If the name appears in both, it defaults to **named**. +4. To explicitly reference an unnamed type in the ambiguous case, use the + map form: + +```yaml +named: + example: + field: { unnamed: foo } +``` + +In practice, ambiguity is rare — names like `end`, `+`, `if` are almost +always only unnamed, while names like `identifier`, `assignment` are only +named. + +## Complete example + +```yaml +supertypes: + _expression: + - assignment + - binary + - identifier + +named: + assignment: + left: _expression + right?: _expression + binary: + left: [_expression, _simple_numeric] + operator: ["!=", "+"] + right: _expression + argument_list: + $children*: [_expression, block_argument] + identifier: + constant: + +unnamed: + - "!=" + - "+" + - "=" + - "end" +``` + +## CLI usage + +Convert YAML to JSON: + +``` +node_types_yaml input.yaml > node-types.json +``` + +Convert JSON to YAML: + +``` +node_types_yaml --from-json node-types.json > node-types.yaml +``` + +Both commands also accept input from stdin if no file argument is given. diff --git a/shared/yeast/src/bin/node_types_yaml.rs b/shared/yeast/src/bin/node_types_yaml.rs new file mode 100644 index 00000000000..bc392ecb1f6 --- /dev/null +++ b/shared/yeast/src/bin/node_types_yaml.rs @@ -0,0 +1,51 @@ +use clap::Parser; +use std::io::Read; + +#[derive(Parser)] +#[clap( + name = "node-types-yaml", + about = "Convert between YAML and JSON node-types formats" +)] +struct Cli { + /// Input file (reads from stdin if not provided) + input: Option, + + /// Convert from JSON to YAML (default is YAML to JSON) + #[arg(long)] + from_json: bool, +} + +fn main() { + let args = Cli::parse(); + + let input = match &args.input { + Some(path) => std::fs::read_to_string(path).unwrap_or_else(|e| { + eprintln!("Error reading {path}: {e}"); + std::process::exit(1); + }), + None => { + let mut buf = String::new(); + std::io::stdin() + .read_to_string(&mut buf) + .unwrap_or_else(|e| { + eprintln!("Error reading stdin: {e}"); + std::process::exit(1); + }); + buf + } + }; + + let result = if args.from_json { + yeast::node_types_yaml::convert_from_json(&input) + } else { + yeast::node_types_yaml::convert(&input) + }; + + match result { + Ok(output) => print!("{output}"), + Err(e) => { + eprintln!("Error: {e}"); + std::process::exit(1); + } + } +} diff --git a/shared/yeast/src/node_types_yaml.rs b/shared/yeast/src/node_types_yaml.rs new file mode 100644 index 00000000000..d321ba8a2cf --- /dev/null +++ b/shared/yeast/src/node_types_yaml.rs @@ -0,0 +1,722 @@ +/// Converts a YAML node-types file to the tree-sitter `node-types.json` format. +/// +/// # YAML format +/// +/// ```yaml +/// supertypes: +/// _expression: +/// - assignment +/// - binary +/// +/// named: +/// assignment: +/// left: _lhs +/// right: _expression +/// identifier: +/// +/// unnamed: +/// - "+" +/// - "end" +/// ``` +/// +/// See the crate-level docs for the full format specification. +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::Write; + +use serde::Deserialize; +use serde_json::json; + +/// Top-level YAML structure. +#[derive(Deserialize, Default)] +struct YamlNodeTypes { + #[serde(default)] + supertypes: BTreeMap>, + #[serde(default)] + named: BTreeMap>>, + #[serde(default)] + unnamed: Vec, +} + +/// A reference to a node type. Can be: +/// - a plain string (resolved by looking up named vs unnamed) +/// - a map `{unnamed: "name"}` to force unnamed interpretation +#[derive(Deserialize, Debug, Clone)] +#[serde(untagged)] +enum TypeRef { + Name(String), + Explicit { unnamed: String }, +} + +/// A field value: either a single type ref or a list of them. +#[derive(Deserialize, Debug, Clone)] +#[serde(untagged)] +enum TypeRefOrList { + Single(TypeRef), + List(Vec), +} + +impl TypeRefOrList { + fn into_vec(self) -> Vec { + match self { + TypeRefOrList::Single(t) => vec![t], + TypeRefOrList::List(v) => v, + } + } +} + +/// Parsed field name: base name + multiplicity markers. +struct FieldSpec { + name: Option, // None for $children + multiple: bool, + required: bool, +} + +fn parse_field_name(raw: &str) -> FieldSpec { + let is_children = + raw == "$children" || raw == "$children?" || raw == "$children*" || raw == "$children+"; + + let suffix = raw.chars().last().filter(|c| matches!(c, '?' | '*' | '+')); + + let (multiple, required) = match suffix { + Some('?') => (false, false), + Some('*') => (true, false), + Some('+') => (true, true), + _ => (false, true), // bare field name = required, single + }; + + let name = if is_children { + None + } else { + let base = raw.trim_end_matches(['?', '*', '+']); + Some(base.to_string()) + }; + + FieldSpec { + name, + multiple, + required, + } +} + +/// Resolve a TypeRef to a (type, named) pair, given the sets of known named +/// and unnamed types. +fn resolve_type_ref( + type_ref: &TypeRef, + named_types: &BTreeSet, + unnamed_types: &BTreeSet, +) -> serde_json::Value { + match type_ref { + TypeRef::Explicit { unnamed } => { + json!({"type": unnamed, "named": false}) + } + TypeRef::Name(name) => { + let is_named = named_types.contains(name); + let is_unnamed = unnamed_types.contains(name); + + if is_named && is_unnamed { + // Ambiguous: default to named + json!({"type": name, "named": true}) + } else if is_unnamed { + json!({"type": name, "named": false}) + } else { + // Named, or unknown (assume named) + json!({"type": name, "named": true}) + } + } + } +} + +/// Convert YAML string to node-types JSON string. +pub fn convert(yaml_input: &str) -> Result { + let yaml: YamlNodeTypes = + serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?; + + // Build the sets of known named and unnamed types for resolution. + let mut named_types = BTreeSet::new(); + for name in yaml.supertypes.keys() { + named_types.insert(name.clone()); + } + for name in yaml.named.keys() { + named_types.insert(name.clone()); + } + let unnamed_types: BTreeSet = yaml.unnamed.iter().cloned().collect(); + + let mut output = Vec::new(); + + // 1. Supertypes + for (name, members) in &yaml.supertypes { + let subtypes: Vec<_> = members + .iter() + .map(|m| resolve_type_ref(m, &named_types, &unnamed_types)) + .collect(); + output.push(json!({ + "type": name, + "named": true, + "subtypes": subtypes, + })); + } + + // 2. Named nodes + for (name, fields_opt) in &yaml.named { + let fields_map = match fields_opt { + None => { + // Leaf token: no fields, no children, no subtypes + output.push(json!({ + "type": name, + "named": true, + "fields": {}, + })); + continue; + } + Some(m) if m.is_empty() => { + output.push(json!({ + "type": name, + "named": true, + "fields": {}, + })); + continue; + } + Some(m) => m, + }; + + let mut json_fields = serde_json::Map::new(); + let mut json_children: Option = None; + + for (raw_field_name, type_refs) in fields_map { + let spec = parse_field_name(raw_field_name); + let types: Vec<_> = type_refs + .clone() + .into_vec() + .iter() + .map(|t| resolve_type_ref(t, &named_types, &unnamed_types)) + .collect(); + + // Cloning to make the borrow checker happy + let field_info = json!({ + "multiple": spec.multiple, + "required": spec.required, + "types": types, + }); + + if spec.name.is_none() { + // $children + json_children = Some(field_info); + } else { + json_fields.insert(spec.name.unwrap(), field_info); + } + } + + let mut entry = json!({ + "type": name, + "named": true, + "fields": json_fields, + }); + + if let Some(children) = json_children { + entry + .as_object_mut() + .unwrap() + .insert("children".to_string(), children); + } + + output.push(entry); + } + + // 3. Unnamed tokens + for name in &yaml.unnamed { + output.push(json!({ + "type": name, + "named": false, + })); + } + + serde_json::to_string_pretty(&output).map_err(|e| format!("Failed to serialize JSON: {e}")) +} + +/// Build a Schema from a YAML node-types string. +/// Registers all node kinds and field names found in the YAML. +pub fn schema_from_yaml(yaml_input: &str) -> Result { + let yaml: YamlNodeTypes = + serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?; + + let mut schema = crate::schema::Schema::new(); + + // Register all supertypes as node kinds + for name in yaml.supertypes.keys() { + schema.register_kind(name); + } + + // Register named node kinds and their fields + for (name, fields_opt) in &yaml.named { + schema.register_kind(name); + if let Some(fields) = fields_opt { + for raw_field_name in fields.keys() { + let spec = parse_field_name(raw_field_name); + if let Some(field_name) = &spec.name { + schema.register_field(field_name); + } + } + } + } + + // Register unnamed tokens as node kinds + for name in &yaml.unnamed { + schema.register_unnamed_kind(name); + } + + Ok(schema) +} + +/// Build a Schema from a YAML string, extending a tree-sitter Language. +/// The Schema inherits all field/kind names from the Language, plus any +/// additional ones defined in the YAML. +pub fn schema_from_yaml_with_language( + yaml_input: &str, + language: &tree_sitter::Language, +) -> Result { + let yaml: YamlNodeTypes = + serde_yaml::from_str(yaml_input).map_err(|e| format!("Failed to parse YAML: {e}"))?; + + let mut schema = crate::schema::Schema::from_language(language); + + // Register supertypes + for name in yaml.supertypes.keys() { + schema.register_kind(name); + } + + // Register named node kinds and their fields + for (name, fields_opt) in &yaml.named { + schema.register_kind(name); + if let Some(fields) = fields_opt { + for raw_field_name in fields.keys() { + let spec = parse_field_name(raw_field_name); + if let Some(field_name) = &spec.name { + schema.register_field(field_name); + } + } + } + } + + // Register unnamed tokens + for name in &yaml.unnamed { + schema.register_unnamed_kind(name); + } + + Ok(schema) +} + +// --------------------------------------------------------------------------- +// JSON → YAML conversion +// --------------------------------------------------------------------------- + +/// JSON node-types structures (mirrors tree-sitter's format). +#[derive(Deserialize)] +struct JsonNodeInfo { + #[serde(rename = "type")] + kind: String, + named: bool, + #[serde(default)] + fields: BTreeMap, + children: Option, + #[serde(default)] + subtypes: Vec, +} + +#[derive(Deserialize)] +struct JsonNodeType { + #[serde(rename = "type")] + kind: String, + named: bool, +} + +#[derive(Deserialize)] +struct JsonFieldInfo { + multiple: bool, + required: bool, + types: Vec, +} + +/// Convert a tree-sitter node-types.json string to the YAML format. +pub fn convert_from_json(json_input: &str) -> Result { + let nodes: Vec = + serde_json::from_str(json_input).map_err(|e| format!("Failed to parse JSON: {e}"))?; + + // Collect all named and unnamed types for disambiguation decisions. + let mut all_named: BTreeSet = BTreeSet::new(); + let mut all_unnamed: BTreeSet = BTreeSet::new(); + for node in &nodes { + if node.named { + all_named.insert(node.kind.clone()); + } else { + all_unnamed.insert(node.kind.clone()); + } + } + + let mut supertypes: BTreeMap> = BTreeMap::new(); + let mut named: BTreeMap>> = BTreeMap::new(); + let mut unnamed: Vec = Vec::new(); + + for node in nodes { + if !node.named { + unnamed.push(node.kind); + continue; + } + + if !node.subtypes.is_empty() { + supertypes.insert(node.kind, node.subtypes); + continue; + } + + if node.fields.is_empty() && node.children.is_none() { + // Leaf token + named.insert(node.kind, None); + } else { + let mut fields = BTreeMap::new(); + for (name, info) in node.fields { + fields.insert(name, info); + } + if let Some(children) = node.children { + fields.insert("$children".to_string(), children); + } + named.insert(node.kind, Some(fields)); + } + } + + // Now emit YAML + let mut out = String::new(); + + // Supertypes + if !supertypes.is_empty() { + writeln!(out, "supertypes:").unwrap(); + for (name, members) in &supertypes { + writeln!(out, " {name}:").unwrap(); + for member in members { + let ref_str = format_type_ref(&member.kind, member.named, &all_named, &all_unnamed); + writeln!(out, " - {ref_str}").unwrap(); + } + } + writeln!(out).unwrap(); + } + + // Named + if !named.is_empty() { + writeln!(out, "named:").unwrap(); + for (name, fields_opt) in &named { + match fields_opt { + None => { + writeln!(out, " {name}:").unwrap(); + } + Some(fields) => { + writeln!(out, " {name}:").unwrap(); + for (field_name, info) in fields { + let suffix = field_suffix(info.multiple, info.required); + let yaml_name = if field_name == "$children" { + format!("$children{suffix}") + } else { + format!("{field_name}{suffix}") + }; + + let type_refs: Vec = info + .types + .iter() + .map(|t| format_type_ref(&t.kind, t.named, &all_named, &all_unnamed)) + .collect(); + + if type_refs.len() == 1 { + writeln!(out, " {yaml_name}: {}", type_refs[0]).unwrap(); + } else { + let list = type_refs + .iter() + .map(|s| s.as_str()) + .collect::>() + .join(", "); + writeln!(out, " {yaml_name}: [{list}]").unwrap(); + } + } + } + } + } + writeln!(out).unwrap(); + } + + // Unnamed + if !unnamed.is_empty() { + writeln!(out, "unnamed:").unwrap(); + for name in &unnamed { + writeln!(out, " - {}", force_quote(name)).unwrap(); + } + } + + Ok(out) +} + +fn field_suffix(multiple: bool, required: bool) -> &'static str { + match (multiple, required) { + (false, true) => "", + (false, false) => "?", + (true, true) => "+", + (true, false) => "*", + } +} + +/// Format a type reference for YAML output. Uses the disambiguation rule: +/// plain string if unambiguous, `{unnamed: name}` if the name exists as both +/// named and unnamed and we need the unnamed interpretation. +fn format_type_ref( + kind: &str, + named: bool, + all_named: &BTreeSet, + _all_unnamed: &BTreeSet, +) -> String { + if named { + quote_yaml(kind) + } else { + let is_also_named = all_named.contains(kind); + if is_also_named { + format!("{{unnamed: {}}}", force_quote(kind)) + } else { + force_quote(kind) + } + } +} + +/// Always wrap in double quotes. Used for unnamed node references so they're +/// visually distinct from named ones — YAML treats both forms as equivalent strings. +fn force_quote(s: &str) -> String { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) +} + +/// Quote a YAML string value if it contains special characters or could be +/// misinterpreted. +fn quote_yaml(s: &str) -> String { + let needs_quoting = s.is_empty() + || s.contains(|c: char| { + matches!( + c, + ':' | '{' + | '}' + | '[' + | ']' + | ',' + | '&' + | '*' + | '#' + | '?' + | '|' + | '-' + | '<' + | '>' + | '=' + | '!' + | '%' + | '@' + | '`' + | '"' + | '\'' + ) + }) + || s.starts_with(' ') + || s.ends_with(' ') + || s == "true" + || s == "false" + || s == "null" + || s == "yes" + || s == "no" + || s.parse::().is_ok(); + + if needs_quoting { + format!("\"{}\"", s.replace('\\', "\\\\").replace('"', "\\\"")) + } else { + s.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_conversion() { + let yaml = r#" +supertypes: + _expression: + - assignment + - binary + +named: + assignment: + left: _lhs + right: _expression + binary: + left: [_expression, _simple_numeric] + operator: ["!=", "+"] + right: _expression + argument_list: + $children*: [_expression, block_argument] + identifier: + +unnamed: + - "!=" + - "+" + - "end" +"#; + + let json_str = convert(yaml).unwrap(); + let result: Vec = serde_json::from_str(&json_str).unwrap(); + + // Check supertype + let expr = &result[0]; + assert_eq!(expr["type"], "_expression"); + assert_eq!(expr["named"], true); + assert_eq!(expr["subtypes"].as_array().unwrap().len(), 2); + + // Check assignment + let assign = result.iter().find(|n| n["type"] == "assignment").unwrap(); + assert_eq!(assign["fields"]["left"]["required"], true); + assert_eq!(assign["fields"]["left"]["multiple"], false); + assert_eq!(assign["fields"]["left"]["types"][0]["type"], "_lhs"); + assert_eq!(assign["fields"]["left"]["types"][0]["named"], true); + + // Check binary.operator — "!=" and "+" should resolve to unnamed + let binary = result.iter().find(|n| n["type"] == "binary").unwrap(); + let op_types = binary["fields"]["operator"]["types"].as_array().unwrap(); + assert_eq!(op_types[0]["type"], "!="); + assert_eq!(op_types[0]["named"], false); + assert_eq!(op_types[1]["type"], "+"); + assert_eq!(op_types[1]["named"], false); + + // Check argument_list has children, not a field + let arg_list = result + .iter() + .find(|n| n["type"] == "argument_list") + .unwrap(); + assert!(arg_list.get("children").is_some()); + assert_eq!(arg_list["children"]["multiple"], true); + assert_eq!(arg_list["children"]["required"], false); + + // Check identifier is a leaf + let ident = result.iter().find(|n| n["type"] == "identifier").unwrap(); + assert_eq!(ident["fields"].as_object().unwrap().len(), 0); + + // Check unnamed tokens + let end = result.iter().find(|n| n["type"] == "end").unwrap(); + assert_eq!(end["named"], false); + } + + #[test] + fn test_explicit_unnamed_disambiguation() { + let yaml = r#" +named: + foo: + field: [{unnamed: bar}] + +unnamed: + - bar +"#; + + let json_str = convert(yaml).unwrap(); + let result: Vec = serde_json::from_str(&json_str).unwrap(); + let foo = result.iter().find(|n| n["type"] == "foo").unwrap(); + assert_eq!(foo["fields"]["field"]["types"][0]["named"], false); + } + + #[test] + fn test_field_suffixes() { + let yaml = r#" +named: + test_node: + required_single: foo + optional_single?: foo + required_multiple+: foo + optional_multiple*: foo +"#; + + let json_str = convert(yaml).unwrap(); + let result: Vec = serde_json::from_str(&json_str).unwrap(); + let node = result.iter().find(|n| n["type"] == "test_node").unwrap(); + let fields = node["fields"].as_object().unwrap(); + + assert_eq!(fields["required_single"]["required"], true); + assert_eq!(fields["required_single"]["multiple"], false); + + assert_eq!(fields["optional_single"]["required"], false); + assert_eq!(fields["optional_single"]["multiple"], false); + + assert_eq!(fields["required_multiple"]["required"], true); + assert_eq!(fields["required_multiple"]["multiple"], true); + + assert_eq!(fields["optional_multiple"]["required"], false); + assert_eq!(fields["optional_multiple"]["multiple"], true); + } + + #[test] + fn test_json_to_yaml() { + let json = r#"[ + {"type": "_expression", "named": true, "subtypes": [ + {"type": "assignment", "named": true}, + {"type": "identifier", "named": true} + ]}, + {"type": "assignment", "named": true, "fields": { + "left": {"multiple": false, "required": true, "types": [ + {"type": "_expression", "named": true} + ]}, + "right": {"multiple": false, "required": false, "types": [ + {"type": "_expression", "named": true} + ]} + }, "children": { + "multiple": true, "required": false, "types": [ + {"type": "identifier", "named": true} + ] + }}, + {"type": "identifier", "named": true, "fields": {}}, + {"type": "=", "named": false}, + {"type": "end", "named": false} + ]"#; + + let yaml = convert_from_json(json).unwrap(); + + // Verify key structures are present + assert!(yaml.contains("supertypes:")); + assert!(yaml.contains("_expression:")); + assert!(yaml.contains("named:")); + assert!(yaml.contains("assignment:")); + assert!(yaml.contains("left:")); + assert!(yaml.contains("right?:")); + assert!(yaml.contains("$children*:")); + assert!(yaml.contains("identifier:")); + assert!(yaml.contains("unnamed:")); + assert!(yaml.contains("\"=\"")); + assert!(yaml.contains("end")); + } + + #[test] + fn test_round_trip() { + let yaml_input = r#" +supertypes: + _expression: + - assignment + - identifier + +named: + assignment: + left: _expression + right?: _expression + $children*: identifier + identifier: + +unnamed: + - "=" + - end +"#; + + // YAML → JSON → YAML + let json = convert(yaml_input).unwrap(); + let yaml_output = convert_from_json(&json).unwrap(); + // YAML → JSON again (should be identical) + let json2 = convert(&yaml_output).unwrap(); + + let v1: serde_json::Value = serde_json::from_str(&json).unwrap(); + let v2: serde_json::Value = serde_json::from_str(&json2).unwrap(); + assert_eq!(v1, v2); + } +} From 4c5548363cd8ce7295c5872d94a6a4f6f261264e Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:37 +0000 Subject: [PATCH 05/11] yeast: Add AST dumper for human-readable tree output Produces indented text showing node kinds, named fields, and leaf content. Unnamed tokens are hidden unless inside a named field. Used by tests for readable assertions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/yeast/src/dump.rs | 181 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 shared/yeast/src/dump.rs diff --git a/shared/yeast/src/dump.rs b/shared/yeast/src/dump.rs new file mode 100644 index 00000000000..99ba019cc3e --- /dev/null +++ b/shared/yeast/src/dump.rs @@ -0,0 +1,181 @@ +use std::fmt::Write; + +use crate::{Ast, Node, NodeContent, CHILD_FIELD}; + +/// Options for controlling AST dump output. +pub struct DumpOptions { + /// Whether to include source locations in the output. + pub show_locations: bool, + /// Whether to include source text for leaf nodes. + pub show_content: bool, +} + +impl Default for DumpOptions { + fn default() -> Self { + Self { + show_locations: false, + show_content: true, + } + } +} + +/// Dump a yeast AST as a human-readable indented text format. +/// +/// Output format: +/// ```text +/// program +/// assignment +/// left: +/// left_assignment_list +/// identifier "x" +/// identifier "y" +/// right: +/// call +/// method: +/// identifier "foo" +/// ``` +pub fn dump_ast(ast: &Ast, root: usize, source: &str) -> String { + dump_ast_with_options(ast, root, source, &DumpOptions::default()) +} + +pub fn dump_ast_with_options( + ast: &Ast, + root: usize, + source: &str, + options: &DumpOptions, +) -> String { + let mut out = String::new(); + dump_node(ast, root, source, options, 0, &mut out); + out +} + +fn dump_node( + ast: &Ast, + id: usize, + source: &str, + options: &DumpOptions, + indent: usize, + out: &mut String, +) { + let node = match ast.get_node(id) { + Some(n) => n, + None => return, + }; + + let prefix = " ".repeat(indent); + + // Node kind + write!(out, "{}{}", prefix, node.kind_name()).unwrap(); + + // Location + if options.show_locations { + let start = node.start_position(); + let end = node.end_position(); + write!( + out, + " [{},{}]-[{},{}]", + start.row + 1, + start.column + 1, + end.row + 1, + end.column + 1 + ) + .unwrap(); + } + + // Content for leaf nodes + if options.show_content && node.is_named() && is_leaf(node) { + let content = node_content(node, source); + if !content.is_empty() { + write!(out, " {content:?}").unwrap(); + } + } + + writeln!(out).unwrap(); + + // Named fields first + for (&field_id, children) in &node.fields { + if field_id == CHILD_FIELD { + continue; // Handle unnamed children last + } + let field_name = ast.field_name_for_id(field_id).unwrap_or("?"); + if children.len() == 1 { + write!(out, "{prefix} {field_name}:").unwrap(); + // Inline single child + let child = ast.get_node(children[0]); + if child.is_some_and(is_leaf) { + write!(out, " ").unwrap(); + dump_node_inline(ast, children[0], source, options, out); + } else { + writeln!(out).unwrap(); + dump_node(ast, children[0], source, options, indent + 2, out); + } + } else { + writeln!(out, "{prefix} {field_name}:").unwrap(); + for &child_id in children { + dump_node(ast, child_id, source, options, indent + 2, out); + } + } + } + + // Unnamed children — skip unnamed tokens (keywords, punctuation) + if let Some(children) = node.fields.get(&CHILD_FIELD) { + for &child_id in children { + if let Some(child) = ast.get_node(child_id) { + if child.is_named() { + dump_node(ast, child_id, source, options, indent + 1, out); + } + } + } + } +} + +/// Dump a leaf node inline (no newline prefix, caller provides context). +fn dump_node_inline(ast: &Ast, id: usize, source: &str, options: &DumpOptions, out: &mut String) { + let node = match ast.get_node(id) { + Some(n) => n, + None => return, + }; + + write!(out, "{}", node.kind_name()).unwrap(); + + if options.show_locations { + let start = node.start_position(); + let end = node.end_position(); + write!( + out, + " [{},{}]-[{},{}]", + start.row + 1, + start.column + 1, + end.row + 1, + end.column + 1 + ) + .unwrap(); + } + + if options.show_content && node.is_named() { + let content = node_content(node, source); + if !content.is_empty() { + write!(out, " {content:?}").unwrap(); + } + } + + writeln!(out).unwrap(); +} + +fn is_leaf(node: &Node) -> bool { + node.fields.is_empty() +} + +fn node_content(node: &Node, source: &str) -> String { + match &node.content { + NodeContent::DynamicString(s) if !s.is_empty() => s.clone(), + _ => { + let range = node.byte_range(); + if range.start < source.len() && range.end <= source.len() { + source[range.start..range.end].to_string() + } else { + String::new() + } + } + } +} From 6e580446fd01bd8f0c2a0e921dde69153ca6abe6 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:18 +0000 Subject: [PATCH 06/11] yeast: Add yeast test suite 12 tests covering parsing, queries, tree building, desugaring rules, cursor navigation, and the shorthand rule! syntax. Tests use a custom output node-types.yml with named fields for all children (parameter, stmt, index), loaded via schema_from_yaml_with_language. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/yeast/tests/node-types.yml | 63 +++++ shared/yeast/tests/test.rs | 419 ++++++++++++++++++++++++++++++ 2 files changed, 482 insertions(+) create mode 100644 shared/yeast/tests/node-types.yml create mode 100644 shared/yeast/tests/test.rs diff --git a/shared/yeast/tests/node-types.yml b/shared/yeast/tests/node-types.yml new file mode 100644 index 00000000000..978ef147543 --- /dev/null +++ b/shared/yeast/tests/node-types.yml @@ -0,0 +1,63 @@ +# Output node types for yeast test rules. +# Inspired by tree-sitter-ruby, but with all children in named fields +# (no unnamed children). This represents the desugared output schema. + +named: + program: + stmt*: [assignment, call, identifier, for] + + assignment: + left: [identifier, left_assignment_list] + right: [identifier, integer, call, element_reference] + + left_assignment_list: + item*: identifier + + element_reference: + object: identifier + index: [integer, identifier] + + for: + pattern: [identifier, left_assignment_list] + value: in + body: do + + in: + value: [identifier, call] + + do: + stmt*: [assignment, identifier, call] + + call: + receiver: [identifier, call] + method: identifier + arguments?: argument_list + block?: block + + argument_list: + argument*: [identifier, integer, call] + + block: + parameters: block_parameters + body: block_body + + block_parameters: + parameter*: identifier + + block_body: + stmt*: [assignment, identifier, call] + + identifier: + integer: + +unnamed: + - "=" + - "," + - "(" + - ")" + - "for" + - "in" + - "do" + - "end" + - "|" + - "." diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs new file mode 100644 index 00000000000..406c990a8bd --- /dev/null +++ b/shared/yeast/tests/test.rs @@ -0,0 +1,419 @@ +#![cfg(test)] + +use yeast::dump::dump_ast; +use yeast::*; + +const OUTPUT_SCHEMA_YAML: &str = include_str!("node-types.yml"); + +/// Helper: parse Ruby source with no rules, return dump. +fn parse_and_dump(input: &str) -> String { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run(input).unwrap(); + dump_ast(&ast, ast.get_root(), input) +} + +/// Helper: parse Ruby source with a custom output schema and rules, return dump. +fn run_and_dump(input: &str, rules: Vec) -> String { + let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into(); + let schema = + yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap(); + let runner = Runner::with_schema(lang, &schema, &rules); + let ast = runner.run(input).unwrap(); + dump_ast(&ast, ast.get_root(), input) +} + +/// Assert that a dump equals the expected string, treating the expected +/// string as an indented multiline literal: leading/trailing blank lines +/// are stripped, and the common leading indentation is removed from every +/// line. This lets test assertions place the first line at the same +/// indentation as the rest of the body. +#[track_caller] +fn assert_dump_eq(actual: &str, expected: &str) { + let min_indent = expected + .lines() + .filter(|l| !l.trim().is_empty()) + .map(|l| l.len() - l.trim_start().len()) + .min() + .unwrap_or(0); + let dedented: String = expected + .lines() + .map(|l| { + if l.len() >= min_indent { + &l[min_indent..] + } else { + l + } + }) + .collect::>() + .join("\n"); + assert_eq!(actual.trim(), dedented.trim()); +} + +// ---- Parsing tests ---- + +#[test] +fn test_parse_assignment() { + let dump = parse_and_dump("x = 1"); + assert_dump_eq( + &dump, + r#" + program + assignment + left: identifier "x" + right: integer "1" + "#, + ); +} + +#[test] +fn test_parse_multiple_assignment() { + let dump = parse_and_dump("x, y = foo()"); + assert_dump_eq( + &dump, + r#" + program + assignment + left: + left_assignment_list + identifier "x" + identifier "y" + right: + call + arguments: + argument_list + method: identifier "foo" + "#, + ); +} + +#[test] +fn test_parse_for_loop() { + let dump = parse_and_dump("for x in list do\n y\nend"); + assert_dump_eq( + &dump, + r#" + program + for + body: + do + identifier "y" + pattern: identifier "x" + value: + in + identifier "list" + "#, + ); +} + +// ---- Query tests ---- + +#[test] +fn test_query_match() { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("x = 1").unwrap(); + + let query = yeast::query!( + (program + child: (assignment + left: (_) @left + right: (_) @right + ) + ) + ); + + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap(); + assert!(matched); + assert!(captures.get_var("left").is_ok()); + assert!(captures.get_var("right").is_ok()); +} + +#[test] +fn test_query_no_match() { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("x = 1").unwrap(); + + let query = yeast::query!( + (program + child: (call + method: (_) @m + ) + ) + ); + + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, ast.get_root(), &mut captures).unwrap(); + assert!(!matched); +} + +#[test] +fn test_query_repeated_capture() { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("x, y, z = 1").unwrap(); + + let query = yeast::query!( + (assignment + left: (left_assignment_list + (identifier)* @names + ) + ) + ); + + // Match against the assignment node (first named child of program) + let mut cursor = AstCursor::new(&ast); + cursor.goto_first_child(); + let assignment_id = cursor.node().id(); + + let mut captures = yeast::captures::Captures::new(); + let matched = query.do_match(&ast, assignment_id, &mut captures).unwrap(); + assert!(matched); + assert_eq!(captures.get_all("names").len(), 3); +} + +// ---- Tree builder tests ---- + +#[test] +fn test_tree_builder() { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let mut ast = runner.run("x = 1").unwrap(); + let input = "x = 1"; + + let query = yeast::query!( + (program + child: (assignment + left: (_) @left + right: (_) @right + ) + ) + ); + + let mut captures = yeast::captures::Captures::new(); + query.do_match(&ast, ast.get_root(), &mut captures).unwrap(); + + // Swap left and right + let fresh = yeast::tree_builder::FreshScope::new(); + let mut ctx = yeast::build::BuildCtx::new(&mut ast, &captures, &fresh); + let new_id = yeast::tree!(ctx, + (program + child: (assignment + left: {ctx.capture("right")} + right: {ctx.capture("left")} + ) + ) + ); + + let dump = dump_ast(ctx.ast, new_id, input); + assert_dump_eq( + &dump, + r#" + program + assignment + left: integer "1" + right: identifier "x" + "#, + ); +} + +// ---- Rule tests ---- + +// These rules use field names from node-types.yml, which extends the +// tree-sitter-ruby grammar with named fields for nodes that only have +// unnamed children in tree-sitter (e.g. block_body.stmt, block_parameters.parameter). +fn ruby_rules() -> Vec { + let assign_rule = yeast::rule!( + (assignment + left: (left_assignment_list + (identifier)* @left + ) + right: (_) @right + ) + => + (assignment + left: (identifier $tmp) + right: {right} + ) + {..left.iter().enumerate().map(|(i, &lhs)| + yeast::tree!( + (assignment + left: {lhs} + right: (element_reference + object: (identifier $tmp) + index: (integer #{i}) + ) + ) + ) + )} + ); + + let for_rule = yeast::rule!( + (for + pattern: (_) @pat + value: (in (_) @val) + body: (do (_)* @body) + ) + => + (call + receiver: {val} + method: (identifier "each") + block: (block + parameters: (block_parameters + parameter: (identifier $tmp) + ) + body: (block_body + stmt: (assignment + left: {pat} + right: (identifier $tmp) + ) + stmt: {..body} + ) + ) + ) + ); + + vec![assign_rule, for_rule] +} + +#[test] +fn test_desugar_multiple_assignment() { + let dump = run_and_dump("x, y = e", ruby_rules()); + assert_dump_eq( + &dump, + r#" + program + assignment + left: identifier "$tmp-0" + right: identifier "e" + assignment + left: identifier "x" + right: + element_reference + object: identifier "$tmp-0" + index: integer "0" + assignment + left: identifier "y" + right: + element_reference + object: identifier "$tmp-0" + index: integer "1" + "#, + ); +} + +#[test] +fn test_desugar_for_loop() { + let dump = run_and_dump("for x in list do\n y\nend", ruby_rules()); + assert_dump_eq( + &dump, + r#" + program + call + block: + block + body: + block_body + stmt: + assignment + left: identifier "x" + right: identifier "$tmp-0" + identifier "y" + parameters: + block_parameters + parameter: identifier "$tmp-0" + method: identifier "each" + receiver: identifier "list" + "#, + ); +} + +#[test] +fn test_shorthand_rule() { + let rule = yeast::rule!( + (assignment + left: (_) @method + right: (_) @receiver + ) + => call + ); + + let dump = run_and_dump("x = 1", vec![rule]); + assert_dump_eq( + &dump, + r#" + program + call + method: identifier "x" + receiver: integer "1" + "#, + ); +} + +// ---- Cursor tests ---- + +#[test] +fn test_cursor_navigation() { + let runner = Runner::new(tree_sitter_ruby::LANGUAGE.into(), &[]); + let ast = runner.run("x = 1").unwrap(); + let mut cursor = AstCursor::new(&ast); + + // Start at root + assert_eq!(cursor.node().kind(), "program"); + + // Go to first child (assignment) + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "assignment"); + + // No sibling + assert!(!cursor.goto_next_sibling()); + + // Go to first child of assignment + assert!(cursor.goto_first_child()); + assert!(cursor.node().is_named()); + + // Go back up + assert!(cursor.goto_parent()); + assert_eq!(cursor.node().kind(), "assignment"); + + assert!(cursor.goto_parent()); + assert_eq!(cursor.node().kind(), "program"); + + // Can't go further up + assert!(!cursor.goto_parent()); +} + +#[test] +fn test_desugar_for_with_multiple_assignment() { + let dump = run_and_dump("for a, b in list do\n x\nend", ruby_rules()); + assert_dump_eq( + &dump, + r#" + program + call + block: + block + body: + block_body + stmt: + assignment + left: identifier "$tmp-1" + right: identifier "$tmp-0" + assignment + left: identifier "a" + right: + element_reference + object: identifier "$tmp-1" + index: integer "0" + assignment + left: identifier "b" + right: + element_reference + object: identifier "$tmp-1" + index: integer "1" + identifier "x" + parameters: + block_parameters + parameter: identifier "$tmp-0" + method: identifier "each" + receiver: identifier "list" + "#, + ); +} From cc28ff9a4866eb8323c799dc627f07d6e8a5d628 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:46 +0000 Subject: [PATCH 07/11] yeast: Add yeast documentation Covers architecture, query language, template language (tree!/trees!/rule!), capture semantics, fresh identifiers, and extractor integration. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- shared/yeast/doc/yeast.md | 329 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 329 insertions(+) create mode 100644 shared/yeast/doc/yeast.md diff --git a/shared/yeast/doc/yeast.md b/shared/yeast/doc/yeast.md new file mode 100644 index 00000000000..d49ff96f11d --- /dev/null +++ b/shared/yeast/doc/yeast.md @@ -0,0 +1,329 @@ +# YEAST — YEAST Elaborates Abstract Syntax Trees + +YEAST is a framework for transforming tree-sitter parse trees before they are +extracted into a CodeQL database. It sits between the tree-sitter parser and +the TRAP extractor, rewriting parts of the AST according to declarative rules. + +## Motivation + +Tree-sitter grammars describe the **concrete syntax** of a language — every +keyword, operator, and punctuation token appears in the parse tree. CodeQL +analyses often prefer a **simplified abstract syntax** where syntactic sugar +has been removed. YEAST bridges this gap by desugaring the tree-sitter output +into a cleaner form before extraction. + +For example, Ruby's `for x in list do ... end` is syntactic sugar for +`list.each { |x| ... }`. A YEAST rule can rewrite the former into the latter +so that CodeQL queries only need to reason about the `.each` form. + +## Architecture + +``` +Source code + │ + ▼ +┌──────────────┐ +│ tree-sitter │ Parse source into a concrete syntax tree +│ parser │ +└──────┬───────┘ + │ tree_sitter::Tree + ▼ +┌──────────────┐ +│ YEAST │ Apply desugaring rules, producing a new AST +│ Runner │ +└──────┬───────┘ + │ yeast::Ast + ▼ +┌──────────────┐ +│ TRAP │ Walk the (possibly rewritten) AST and emit TRAP tuples +│ extractor │ +└──────────────┘ +``` + +The entry point is `extract()` in the shared tree-sitter extractor. When +called with a non-empty `rules` vector, the parsed tree is run through the +YEAST `Runner` before TRAP extraction; with an empty `rules` vector the +tree is extracted unchanged. + +## How desugaring works + +A YEAST `Rule` has two parts: + +1. A **query** that matches nodes in the AST using a tree-sitter-inspired + pattern language. +2. A **transform** that produces replacement nodes from the match captures. + +The `Runner` applies rules by walking the tree top-down. At each node, it +tries each rule in order. If a rule's query matches, the node is replaced by +the transform's output, and the rules are re-applied to the result. If no +rule matches, the node is kept and its children are processed recursively. + +A rule can replace one node with zero nodes (deletion), one node (rewriting), +or multiple nodes (expansion). + +## Query language + +Queries use a syntax inspired by +[tree-sitter queries](https://tree-sitter.github.io/tree-sitter/using-parsers/queries/index.html), +written inside the `yeast::query!()` proc macro. + +### Node patterns + +```rust +// Match any named node +(_) + +// Match a node of a specific kind +(assignment) + +// Match an unnamed token by its text +("end") +``` + +### Fields + +```rust +// Match a node with specific fields +(assignment + left: (identifier) @lhs + right: (_) @rhs +) +``` + +Fields are matched by name. Unmentioned fields are ignored — the pattern +`(assignment left: (_) @x)` matches any `assignment` node regardless of +what's in `right`. + +### Captures + +Captures bind matched nodes to names for use in the transform. A capture +`@name` always follows the pattern it captures: + +```rust +(identifier) @name // capture an identifier node +(_) @value // capture any named node +(identifier)* @items // capture each repeated match +``` + +### Unnamed children + +Patterns that appear after all named fields match unnamed (positional) +children. Named node patterns like `(_)` automatically skip unnamed tokens +(keywords, operators, punctuation), matching tree-sitter semantics: + +```rust +(for + pattern: (_) @pat // named field + value: (in (_) @val) // "in" token is skipped automatically + body: (do (_)* @body) // "do" and "end" tokens skipped +) +``` + +### Repetitions + +```rust +(_)* // zero or more +(_)+ // one or more +(_)? // zero or one +(identifier)* @names // capture each repeated match +``` + +## Template language + +Templates construct new AST nodes using the `tree!` and `trees!` macros. +All children in a template must be in named fields — output AST nodes are +always fully fielded. + +When used inside a `rule!` macro, the context is implicit — no explicit +`BuildCtx` argument is needed. When used standalone, they take a `BuildCtx` +as the first argument: + +```rust +// Inside rule! — implicit context, captures are Rust variables +yeast::rule!( + (assignment left: (_) @left right: (_) @right) + => + (assignment left: {right} right: {left}) +); + +// Standalone — explicit context +let fresh = yeast::tree_builder::FreshScope::new(); +let mut ctx = BuildCtx::new(ast, &captures, &fresh); +let id = yeast::tree!(ctx, + (assignment + left: {ctx.capture("lhs")} + right: {ctx.capture("rhs")} + ) +); +``` + +### `tree!` — build a single node + +`tree!(...)` returns a single node `Id`: + +```rust +yeast::tree!(ctx, + (assignment + left: {ctx.capture("lhs")} + right: {ctx.capture("rhs")} + ) +) +``` + +### `trees!` — build multiple nodes + +`trees!(...)` returns `Vec`: + +```rust +yeast::trees!(ctx, + (assignment left: {tmp} right: {right}) + {..body} +) +``` + +### Literal nodes + +`(kind "text")` creates a leaf node with fixed text content: + +```rust +(identifier "each") // an identifier node whose text is "each" +``` + +### Computed literals + +`(kind #{expr})` creates a leaf node whose content is `expr.to_string()`: + +```rust +(integer #{i}) // an integer node with the value of i +(identifier #{name}) // an identifier from a Rust variable +``` + +### Fresh identifiers + +`(kind $name)` creates a leaf node with an auto-generated unique name. All +occurrences of the same `$name` within one `BuildCtx` share the same value: + +```rust +(block + parameters: (block_parameters + (identifier $tmp) // generates e.g. "$tmp-0" + ) + body: (block_body + (assignment + left: {pat} + right: (identifier $tmp) // same "$tmp-0" value + ) + ) +) +``` + +### Embedded Rust expressions + +`{expr}` embeds a Rust expression that returns a single node `Id`: + +```rust +(assignment + left: {some_node_id} // insert a pre-built node + right: {rhs} // insert a captured value (inside rule!) +) +``` + +`{..expr}` splices a `Vec` (or any iterable of `Id`): + +```rust +yeast::trees!(ctx, + (assignment left: {tmp} right: {right}) + {..extra_nodes} // splice a Vec +) +``` + +Inside `rule!`, captures are Rust variables, so `{name}` inserts a +single capture (`Id`) and `{..name}` splices a repeated capture +(`Vec`). + +## Complete example: for-loop desugaring + +This rule rewrites Ruby's `for pat in val do body end` into +`val.each { |tmp| pat = tmp; body }`: + +```rust +let for_rule = yeast::rule!( + (for + pattern: (_) @pat + value: (in (_) @val) + body: (do (_)* @body) + ) + => + (call + receiver: {val} + method: (identifier "each") + block: (block + parameters: (block_parameters + (identifier $tmp) + ) + body: (block_body + (assignment + left: {pat} + right: (identifier $tmp) + ) + {..body} + ) + ) + ) +); +``` + +Captures from the query (`@pat`, `@val`, `@body`) become Rust variables +automatically: single captures bind as `Id`, repeated captures (after +`*` or `+`) as `Vec`, and optional captures (after `?`) as +`Option`. + +## The `rule!` macro + +`rule!` combines a query and a transform into a single declaration: + +```rust +// Full template form +yeast::rule!( + (query_pattern field: (_) @capture) + => + (output_template field: {capture}) +) + +// Shorthand form — captures become fields on the output node +yeast::rule!( + (query_pattern field: (_) @capture) + => output_kind +) +``` + +The shorthand `=> kind` form auto-generates the template, mapping each +capture name to a field of the same name on the output node. + +## Integration with the extractor + +A YEAST desugaring pass is configured with a [`DesugaringConfig`], which +carries the rules and an optional output node-types schema (in YAML +format). Attach it to a language spec to enable rewriting: + +```rust +let desugar = yeast::DesugaringConfig::new(my_rules) + .with_output_node_types_yaml(include_str!("output-node-types.yml")); + +let lang = simple::LanguageSpec { + prefix: "ruby", + ts_language: tree_sitter_ruby::LANGUAGE.into(), + node_types: tree_sitter_ruby::NODE_TYPES, + desugar: Some(desugar), + file_globs: vec!["*.rb".into()], +}; +``` + +The same YAML node-types is used for both the runtime yeast `Schema` (so +rules can refer to output-only kinds and fields) and TRAP validation (it +is converted to JSON internally). + +For the dbscheme/QL code generator, set `Language::desugar` to a +`DesugaringConfig` carrying the same YAML; the generator converts it to +JSON for downstream code generation. The `rules` field of the config is +unused at code-generation time. From 9ad431dea1fd87f956868565e61afbc621af3ee7 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:13:58 +0000 Subject: [PATCH 08/11] yeast: Integrate yeast with shared tree-sitter extractor extract() gains a rules parameter. When empty, uses tree-sitter native traversal (no behavior change). When non-empty, runs yeast desugaring and extracts via traverse_yeast. Adds AstNode trait abstracting over tree_sitter::Node and yeast::Node, with minimal changes to existing Visitor methods (Node -> &N in 6 signatures). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ql/extractor/src/extractor.rs | 4 + ruby/extractor/src/extractor.rs | 2 + shared/tree-sitter-extractor/Cargo.toml | 1 + .../src/extractor/mod.rs | 151 ++++++++++++++++-- .../src/extractor/simple.rs | 35 +++- .../tests/integration_test.rs | 1 + .../tests/multiple_languages.rs | 2 + 7 files changed, 178 insertions(+), 18 deletions(-) diff --git a/ql/extractor/src/extractor.rs b/ql/extractor/src/extractor.rs index 8383d8424ee..66096442e85 100644 --- a/ql/extractor/src/extractor.rs +++ b/ql/extractor/src/extractor.rs @@ -29,24 +29,28 @@ pub fn run(options: Options) -> std::io::Result<()> { prefix: "ql", ts_language: tree_sitter_ql::LANGUAGE.into(), node_types: tree_sitter_ql::NODE_TYPES, + desugar: None, file_globs: vec!["*.ql".into(), "*.qll".into()], }, simple::LanguageSpec { prefix: "dbscheme", ts_language: tree_sitter_ql_dbscheme::LANGUAGE.into(), node_types: tree_sitter_ql_dbscheme::NODE_TYPES, + desugar: None, file_globs: vec!["*.dbscheme".into()], }, simple::LanguageSpec { prefix: "json", ts_language: tree_sitter_json::LANGUAGE.into(), node_types: tree_sitter_json::NODE_TYPES, + desugar: None, file_globs: vec!["*.json".into(), "*.jsonl".into(), "*.jsonc".into()], }, simple::LanguageSpec { prefix: "blame", ts_language: tree_sitter_blame::LANGUAGE.into(), node_types: tree_sitter_blame::NODE_TYPES, + desugar: None, file_globs: vec!["*.blame".into()], }, ], diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 6807d09e9be..4849f473ccb 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -123,6 +123,7 @@ pub fn run(options: Options) -> std::io::Result<()> { &path, &source, &[], + None, ); let (ranges, line_breaks) = scan_erb( @@ -211,6 +212,7 @@ pub fn run(options: Options) -> std::io::Result<()> { &path, &source, &code_ranges, + None, ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; if needs_conversion { diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index d02f02fd588..1ad18a6df5a 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -20,6 +20,7 @@ serde_json = "1.0" chrono = { version = "0.4.42", features = ["serde"] } num_cpus = "1.17.0" zstd = "0.13.3" +yeast = { path = "../yeast" } [dev-dependencies] tree-sitter-ql = "0.23.1" diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index 0ace3831881..0c3e1366081 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -18,6 +18,82 @@ use tree_sitter::{Language, Node, Parser, Range, Tree}; pub mod simple; +/// Trait abstracting over tree-sitter and yeast node types for extraction. +trait AstNode { + fn kind(&self) -> &str; + fn is_named(&self) -> bool; + fn is_missing(&self) -> bool; + fn is_error(&self) -> bool; + fn is_extra(&self) -> bool; + fn start_position(&self) -> tree_sitter::Point; + fn end_position(&self) -> tree_sitter::Point; + fn byte_range(&self) -> std::ops::Range; + fn end_byte(&self) -> usize { + self.byte_range().end + } + /// For yeast nodes with synthetic content, return it. Otherwise None. + fn opt_string_content(&self) -> Option { + None + } +} + +impl<'a> AstNode for Node<'a> { + fn kind(&self) -> &str { + Node::kind(self) + } + fn is_named(&self) -> bool { + Node::is_named(self) + } + fn is_missing(&self) -> bool { + Node::is_missing(self) + } + fn is_error(&self) -> bool { + Node::is_error(self) + } + fn is_extra(&self) -> bool { + Node::is_extra(self) + } + fn start_position(&self) -> tree_sitter::Point { + Node::start_position(self) + } + fn end_position(&self) -> tree_sitter::Point { + Node::end_position(self) + } + fn byte_range(&self) -> std::ops::Range { + Node::byte_range(self) + } +} + +impl AstNode for yeast::Node { + fn kind(&self) -> &str { + yeast::Node::kind(self) + } + fn is_named(&self) -> bool { + yeast::Node::is_named(self) + } + fn is_missing(&self) -> bool { + yeast::Node::is_missing(self) + } + fn is_error(&self) -> bool { + yeast::Node::is_error(self) + } + fn is_extra(&self) -> bool { + yeast::Node::is_extra(self) + } + fn start_position(&self) -> tree_sitter::Point { + yeast::Node::start_position(self) + } + fn end_position(&self) -> tree_sitter::Point { + yeast::Node::end_position(self) + } + fn byte_range(&self) -> std::ops::Range { + yeast::Node::byte_range(self) + } + fn opt_string_content(&self) -> Option { + yeast::Node::opt_string_content(self) + } +} + /// Sets the tracing level based on the environment variables /// `RUST_LOG` and `CODEQL_VERBOSITY` (prioritized in that order), /// falling back to `warn` if neither is set. @@ -204,6 +280,11 @@ pub fn location_label(writer: &mut trap::Writer, location: trap::Location) -> tr } /// Extracts the source file at `path`, which is assumed to be canonicalized. +/// When `yeast_runner` is `Some`, the parsed tree is first transformed +/// through the supplied yeast `Runner` before TRAP extraction. Building the +/// `Runner` (which parses YAML and constructs the schema) is the caller's +/// responsibility, allowing it to be done once and shared across files. +#[allow(clippy::too_many_arguments)] pub fn extract( language: &Language, language_prefix: &str, @@ -214,6 +295,7 @@ pub fn extract( path: &Path, source: &[u8], ranges: &[Range], + yeast_runner: Option<&yeast::Runner<'_>>, ) { let path_str = file_paths::normalize_and_transform_path(path, transformer); let span = tracing::span!( @@ -236,13 +318,20 @@ pub fn extract( source, diagnostics_writer, trap_writer, - // TODO: should we handle path strings that are not valid UTF8 better? &path_str, file_label, language_prefix, schema, ); - traverse(&tree, &mut visitor); + + if let Some(yeast_runner) = yeast_runner { + let ast = yeast_runner + .run_from_tree(&tree) + .unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}")); + traverse_yeast(&ast, &mut visitor); + } else { + traverse(&tree, &mut visitor); + } parser.reset(); } @@ -329,11 +418,11 @@ impl<'a> Visitor<'a> { ); } - fn record_parse_error_for_node( + fn record_parse_error_for_node( &mut self, message: &str, args: &[diagnostics::MessageArg], - node: Node, + node: &N, status_page: bool, ) { let loc = location_for(self, self.file_label, node); @@ -357,7 +446,7 @@ impl<'a> Visitor<'a> { self.record_parse_error(loc_label, &mesg); } - fn enter_node(&mut self, node: Node) -> bool { + fn enter_node(&mut self, node: &N) -> bool { if node.is_missing() { self.record_parse_error_for_node( "A parse error occurred (expected {} symbol). Check the syntax of the file. If the file is invalid, correct the error or {} the file from analysis.", @@ -383,7 +472,7 @@ impl<'a> Visitor<'a> { true } - fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { + fn leave_node(&mut self, field_name: Option<&'static str>, node: &N) { if node.is_error() || node.is_missing() { return; } @@ -434,7 +523,7 @@ impl<'a> Visitor<'a> { fields, name: table_name, } => { - if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) { + if let Some(args) = self.complex_node(node, fields, &child_nodes, id) { self.trap_writer.add_tuple( &self.ast_node_location_table_name, vec![trap::Arg::Label(id), trap::Arg::Label(loc_label)], @@ -495,9 +584,9 @@ impl<'a> Visitor<'a> { } } - fn complex_node( + fn complex_node( &mut self, - node: &Node, + node: &N, fields: &[Field], child_nodes: &[ChildNode], parent_id: trap::Label, @@ -529,7 +618,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)), diagnostics::MessageArg::Code(&format!("{:?}", field.type_info)), ], - *node, + node, false, ); } @@ -541,7 +630,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(child_node.field_name.unwrap_or("child")), diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)), ], - *node, + node, false, ); } @@ -566,7 +655,7 @@ impl<'a> Visitor<'a> { node.kind(), column_name ); - self.record_parse_error_for_node(&error_message, &[], *node, false); + self.record_parse_error_for_node(&error_message, &[], node, false); } } Storage::Table { @@ -582,7 +671,7 @@ impl<'a> Visitor<'a> { diagnostics::MessageArg::Code(node.kind()), diagnostics::MessageArg::Code(table_name), ], - *node, + node, false, ); break; @@ -639,15 +728,21 @@ impl<'a> Visitor<'a> { } // Emit a slice of a source file as an Arg. -fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg { - let range = n.byte_range(); - trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) +fn sliced_source_arg(source: &[u8], n: &N) -> trap::Arg { + trap::Arg::String(n.opt_string_content().unwrap_or_else(|| { + let range = n.byte_range(); + String::from_utf8_lossy(&source[range.start..range.end]).into_owned() + })) } // Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated. // The first is the location and label definition, and the second is the // 'Located' entry. -fn location_for(visitor: &mut Visitor, file_label: trap::Label, n: Node) -> trap::Location { +fn location_for( + visitor: &mut Visitor, + file_label: trap::Label, + n: &N, +) -> trap::Location { // Tree-sitter row, column values are 0-based while CodeQL starts // counting at 1. In addition Tree-sitter's row and column for the // end position are exclusive while CodeQL's end positions are inclusive. @@ -715,6 +810,28 @@ fn location_for(visitor: &mut Visitor, file_label: trap::Label, n: Node) -> trap fn traverse(tree: &Tree, visitor: &mut Visitor) { let cursor = &mut tree.walk(); + visitor.enter_node(&cursor.node()); + let mut recurse = true; + loop { + if recurse && cursor.goto_first_child() { + recurse = visitor.enter_node(&cursor.node()); + } else { + visitor.leave_node(cursor.field_name(), &cursor.node()); + + if cursor.goto_next_sibling() { + recurse = visitor.enter_node(&cursor.node()); + } else if cursor.goto_parent() { + recurse = false; + } else { + break; + } + } + } +} + +fn traverse_yeast(tree: &yeast::Ast, visitor: &mut Visitor) { + use yeast::Cursor; + let mut cursor = tree.walk(); visitor.enter_node(cursor.node()); let mut recurse = true; loop { diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index b8446d02f89..6fcd29b0344 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -7,11 +7,17 @@ use std::path::{Path, PathBuf}; use crate::diagnostics; use crate::node_types; +use yeast; pub struct LanguageSpec { pub prefix: &'static str, pub ts_language: tree_sitter::Language, pub node_types: &'static str, + /// Optional yeast desugaring configuration. When set, the parsed + /// tree is rewritten through yeast before TRAP extraction. The + /// config's `output_node_types_yaml` (if set) provides the schema + /// used both at runtime (for the rewriter) and for TRAP validation. + pub desugar: Option, pub file_globs: Vec, } @@ -85,9 +91,35 @@ impl Extractor { .collect(); let mut schemas = vec![]; + let mut yeast_runners = Vec::new(); for lang in &self.languages { - let schema = node_types::read_node_types_str(lang.prefix, lang.node_types)?; + let effective_node_types: String = + match lang.desugar.as_ref().and_then(|c| c.output_node_types_yaml) { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { + std::io::Error::other(format!( + "Failed to convert YAML node-types to JSON for {}: {e}", + lang.prefix + )) + })?, + None => lang.node_types.to_string(), + }; + let schema = node_types::read_node_types_str(lang.prefix, &effective_node_types)?; schemas.push(schema); + + // Build the yeast runner once per language so the YAML schema + // isn't re-parsed for every file. + let yeast_runner = lang + .desugar + .as_ref() + .map(|config| yeast::Runner::from_config(lang.ts_language.clone(), config)) + .transpose() + .map_err(|e| { + std::io::Error::other(format!( + "Failed to build desugaring runner for {}: {e}", + lang.prefix + )) + })?; + yeast_runners.push(yeast_runner); } // Construct a single globset containing all language globs, @@ -162,6 +194,7 @@ impl Extractor { &path, &source, &[], + yeast_runners[i].as_ref(), ); std::fs::create_dir_all(src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; diff --git a/shared/tree-sitter-extractor/tests/integration_test.rs b/shared/tree-sitter-extractor/tests/integration_test.rs index 2b243ff7945..694eb526f39 100644 --- a/shared/tree-sitter-extractor/tests/integration_test.rs +++ b/shared/tree-sitter-extractor/tests/integration_test.rs @@ -13,6 +13,7 @@ fn simple_extractor() { prefix: "ql", ts_language: tree_sitter_ql::LANGUAGE.into(), node_types: tree_sitter_ql::NODE_TYPES, + desugar: None, file_globs: vec!["*.qll".into()], }; diff --git a/shared/tree-sitter-extractor/tests/multiple_languages.rs b/shared/tree-sitter-extractor/tests/multiple_languages.rs index 2e45e56754a..e345eec5828 100644 --- a/shared/tree-sitter-extractor/tests/multiple_languages.rs +++ b/shared/tree-sitter-extractor/tests/multiple_languages.rs @@ -13,12 +13,14 @@ fn multiple_language_extractor() { prefix: "ql", ts_language: tree_sitter_ql::LANGUAGE.into(), node_types: tree_sitter_ql::NODE_TYPES, + desugar: None, file_globs: vec!["*.qll".into()], }; let lang_json = simple::LanguageSpec { prefix: "json", ts_language: tree_sitter_json::LANGUAGE.into(), node_types: tree_sitter_json::NODE_TYPES, + desugar: None, file_globs: vec!["*.json".into(), "*Jsonfile".into()], }; From 82bbdee83234d082d1fd181b131f282e2b2794ba Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 4 May 2026 13:27:00 +0000 Subject: [PATCH 09/11] yeast: Support separate output node types in extractor generator Language and LanguageSpec gain optional output_node_types field. When set, the generator produces dbscheme/QL from the output types and the extractor validates TRAP against them. All existing extractors pass None (no behavior change). Ruby extract() calls gain vec![] for the new rules parameter. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ql/extractor/src/generator.rs | 4 ++++ ruby/extractor/src/generator.rs | 2 ++ .../src/generator/language.rs | 5 +++++ .../tree-sitter-extractor/src/generator/mod.rs | 16 +++++++++++++++- 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ql/extractor/src/generator.rs b/ql/extractor/src/generator.rs index 650e11c138b..96ce5319dd1 100644 --- a/ql/extractor/src/generator.rs +++ b/ql/extractor/src/generator.rs @@ -21,18 +21,22 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "QL".to_owned(), node_types: tree_sitter_ql::NODE_TYPES, + desugar: None, }, Language { name: "Dbscheme".to_owned(), node_types: tree_sitter_ql_dbscheme::NODE_TYPES, + desugar: None, }, Language { name: "Blame".to_owned(), node_types: tree_sitter_blame::NODE_TYPES, + desugar: None, }, Language { name: "JSON".to_owned(), node_types: tree_sitter_json::NODE_TYPES, + desugar: None, }, ]; diff --git a/ruby/extractor/src/generator.rs b/ruby/extractor/src/generator.rs index de1d0dbfd7e..0430afd103e 100644 --- a/ruby/extractor/src/generator.rs +++ b/ruby/extractor/src/generator.rs @@ -21,10 +21,12 @@ pub fn run(options: Options) -> std::io::Result<()> { Language { name: "Ruby".to_owned(), node_types: tree_sitter_ruby::NODE_TYPES, + desugar: None, }, Language { name: "Erb".to_owned(), node_types: tree_sitter_embedded_template::NODE_TYPES, + desugar: None, }, ]; diff --git a/shared/tree-sitter-extractor/src/generator/language.rs b/shared/tree-sitter-extractor/src/generator/language.rs index f0b0ed1790f..a95f750b572 100644 --- a/shared/tree-sitter-extractor/src/generator/language.rs +++ b/shared/tree-sitter-extractor/src/generator/language.rs @@ -1,4 +1,9 @@ pub struct Language { pub name: String, pub node_types: &'static str, + /// Optional yeast desugaring configuration. When set with an + /// `output_node_types_yaml`, the generator uses that YAML for the + /// dbscheme/QL library instead of `node_types`. The `rules` field is + /// unused at code-generation time; only the schema matters. + pub desugar: Option, } diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index 78e9e4a0b69..d2521c51b3e 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -6,6 +6,7 @@ use std::io::Write; use std::path::PathBuf; use crate::node_types; +use yeast; pub mod dbscheme; pub mod language; @@ -68,7 +69,20 @@ pub fn generate( let token_name = format!("{}_token", &prefix); let tokeninfo_name = format!("{}_tokeninfo", &prefix); let reserved_word_name = format!("{}_reserved_word", &prefix); - let nodes = node_types::read_node_types_str(&prefix, language.node_types)?; + let effective_node_types: String = match language + .desugar + .as_ref() + .and_then(|c| c.output_node_types_yaml) + { + Some(yaml) => yeast::node_types_yaml::convert(yaml).map_err(|e| { + std::io::Error::other(format!( + "Failed to convert YAML node-types to JSON for {}: {e}", + language.name + )) + })?, + None => language.node_types.to_string(), + }; + let nodes = node_types::read_node_types_str(&prefix, &effective_node_types)?; let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes); ast_node_members.insert(&token_name); writeln!(&mut dbscheme_writer, "/*- {} dbscheme -*/", language.name)?; From 60dcf88b50305b8e4afc4f00bde1ec7caeb03fce Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 5 May 2026 12:26:57 +0000 Subject: [PATCH 10/11] yeast: Add Bazel build rules for yeast crates Add BUILD.bazel files for the yeast and yeast-macros crates, register them as dependencies of the shared tree-sitter extractor, and refresh the vendored crate dependencies via update_tree_sitter_extractors_deps.sh. --- Cargo.lock | 14 +- MODULE.bazel | 4 +- .../tree_sitter_extractors_deps/BUILD.bazel | 30 +++- ....cc-1.2.37.bazel => BUILD.cc-1.2.61.bazel} | 4 +- ...azel => BUILD.find-msvc-tools-0.1.9.bazel} | 2 +- .../BUILD.iana-time-zone-haiku-0.1.2.bazel | 2 +- ...9.bazel => BUILD.tree-sitter-0.26.8.bazel} | 8 +- ...tree-sitter-embedded-template-0.25.0.bazel | 2 +- .../BUILD.tree-sitter-json-0.24.8.bazel | 2 +- .../BUILD.tree-sitter-python-0.23.6.bazel | 166 ++++++++++++++++++ .../BUILD.tree-sitter-ql-0.23.1.bazel | 2 +- .../BUILD.tree-sitter-ruby-0.23.1.bazel | 2 +- .../BUILD.zstd-sys-2.0.16+zstd.1.5.7.bazel | 2 +- .../tree_sitter_extractors_deps/defs.bzl | 114 ++++++++++-- shared/tree-sitter-extractor/BUILD.bazel | 4 +- shared/yeast-macros/BUILD.bazel | 12 ++ shared/yeast/BUILD.bazel | 18 ++ 17 files changed, 346 insertions(+), 42 deletions(-) rename misc/bazel/3rdparty/tree_sitter_extractors_deps/{BUILD.cc-1.2.37.bazel => BUILD.cc-1.2.61.bazel} (98%) rename misc/bazel/3rdparty/tree_sitter_extractors_deps/{BUILD.find-msvc-tools-0.1.1.bazel => BUILD.find-msvc-tools-0.1.9.bazel} (99%) rename misc/bazel/3rdparty/tree_sitter_extractors_deps/{BUILD.tree-sitter-0.25.9.bazel => BUILD.tree-sitter-0.26.8.bazel} (97%) create mode 100644 misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-python-0.23.6.bazel create mode 100644 shared/yeast-macros/BUILD.bazel create mode 100644 shared/yeast/BUILD.bazel diff --git a/Cargo.lock b/Cargo.lock index 38a3c806fb2..12be6b1cc43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,9 +240,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.37" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ "find-msvc-tools", "jobserver", @@ -755,9 +755,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.1" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -2471,6 +2471,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ + "indexmap 2.11.4", "itoa", "memchr", "ryu", @@ -2853,13 +2854,14 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.24.7" +version = "0.26.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +checksum = "887bd495d0582c5e3e0d8ece2233666169fa56a9644d172fc22ad179ab2d0538" dependencies = [ "cc", "regex", "regex-syntax", + "serde_json", "streaming-iterator", "tree-sitter-language", ] diff --git a/MODULE.bazel b/MODULE.bazel index 16b4a4691f8..e7474e9a393 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -141,14 +141,16 @@ use_repo( "vendor_ts__serde-1.0.228", "vendor_ts__serde_json-1.0.145", "vendor_ts__serde_with-3.14.1", + "vendor_ts__serde_yaml-0.9.34-deprecated", "vendor_ts__syn-2.0.106", "vendor_ts__toml-0.9.7", "vendor_ts__tracing-0.1.41", "vendor_ts__tracing-flame-0.2.0", "vendor_ts__tracing-subscriber-0.3.20", - "vendor_ts__tree-sitter-0.25.9", + "vendor_ts__tree-sitter-0.26.8", "vendor_ts__tree-sitter-embedded-template-0.25.0", "vendor_ts__tree-sitter-json-0.24.8", + "vendor_ts__tree-sitter-python-0.23.6", "vendor_ts__tree-sitter-ql-0.23.1", "vendor_ts__tree-sitter-ruby-0.23.1", "vendor_ts__triomphe-0.1.14", diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel index a5cfeccdcea..27d36c221ea 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.bazel @@ -529,6 +529,18 @@ alias( tags = ["manual"], ) +alias( + name = "serde_yaml-0.9.34+deprecated", + actual = "@vendor_ts__serde_yaml-0.9.34-deprecated//:serde_yaml", + tags = ["manual"], +) + +alias( + name = "serde_yaml", + actual = "@vendor_ts__serde_yaml-0.9.34-deprecated//:serde_yaml", + tags = ["manual"], +) + alias( name = "syn-2.0.106", actual = "@vendor_ts__syn-2.0.106//:syn", @@ -590,14 +602,14 @@ alias( ) alias( - name = "tree-sitter-0.25.9", - actual = "@vendor_ts__tree-sitter-0.25.9//:tree_sitter", + name = "tree-sitter-0.26.8", + actual = "@vendor_ts__tree-sitter-0.26.8//:tree_sitter", tags = ["manual"], ) alias( name = "tree-sitter", - actual = "@vendor_ts__tree-sitter-0.25.9//:tree_sitter", + actual = "@vendor_ts__tree-sitter-0.26.8//:tree_sitter", tags = ["manual"], ) @@ -625,6 +637,18 @@ alias( tags = ["manual"], ) +alias( + name = "tree-sitter-python-0.23.6", + actual = "@vendor_ts__tree-sitter-python-0.23.6//:tree_sitter_python", + tags = ["manual"], +) + +alias( + name = "tree-sitter-python", + actual = "@vendor_ts__tree-sitter-python-0.23.6//:tree_sitter_python", + tags = ["manual"], +) + alias( name = "tree-sitter-ql-0.23.1", actual = "@vendor_ts__tree-sitter-ql-0.23.1//:tree_sitter_ql", diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.37.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.61.bazel similarity index 98% rename from misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.37.bazel rename to misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.61.bazel index c747c1c3c4f..4071cd59243 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.37.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.cc-1.2.61.bazel @@ -96,9 +96,9 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "1.2.37", + version = "1.2.61", deps = [ - "@vendor_ts__find-msvc-tools-0.1.1//:find_msvc_tools", + "@vendor_ts__find-msvc-tools-0.1.9//:find_msvc_tools", "@vendor_ts__jobserver-0.1.34//:jobserver", "@vendor_ts__shlex-1.3.0//:shlex", ] + select({ diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.1.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.9.bazel similarity index 99% rename from misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.1.bazel rename to misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.9.bazel index 8fc8c9a81e2..12cd2ecbe0c 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.1.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.find-msvc-tools-0.1.9.bazel @@ -93,5 +93,5 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "0.1.1", + version = "0.1.9", ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.iana-time-zone-haiku-0.1.2.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.iana-time-zone-haiku-0.1.2.bazel index fe120c04689..582c31e06fb 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.iana-time-zone-haiku-0.1.2.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.iana-time-zone-haiku-0.1.2.bazel @@ -154,7 +154,7 @@ cargo_build_script( version = "0.1.2", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.25.9.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.26.8.bazel similarity index 97% rename from misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.25.9.bazel rename to misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.26.8.bazel index fd97779c54c..98cdfaa4d2f 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.25.9.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-0.26.8.bazel @@ -101,12 +101,12 @@ rust_library( "@rules_rust//rust/platform:x86_64-unknown-uefi": [], "//conditions:default": ["@platforms//:incompatible"], }), - version = "0.25.9", + version = "0.26.8", deps = [ "@vendor_ts__regex-1.11.3//:regex", "@vendor_ts__regex-syntax-0.8.6//:regex_syntax", "@vendor_ts__streaming-iterator-0.1.9//:streaming_iterator", - "@vendor_ts__tree-sitter-0.25.9//:build_script_build", + "@vendor_ts__tree-sitter-0.26.8//:build_script_build", "@vendor_ts__tree-sitter-language-0.1.5//:tree_sitter_language", ], ) @@ -164,10 +164,10 @@ cargo_build_script( "noclippy", "norustfmt", ], - version = "0.25.9", + version = "0.26.8", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", "@vendor_ts__serde_json-1.0.145//:serde_json", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-embedded-template-0.25.0.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-embedded-template-0.25.0.bazel index c9dd60b03c0..57cf6cd860b 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-embedded-template-0.25.0.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-embedded-template-0.25.0.bazel @@ -155,7 +155,7 @@ cargo_build_script( version = "0.25.0", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-json-0.24.8.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-json-0.24.8.bazel index 6b9a7bf0582..245539d02e7 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-json-0.24.8.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-json-0.24.8.bazel @@ -155,7 +155,7 @@ cargo_build_script( version = "0.24.8", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-python-0.23.6.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-python-0.23.6.bazel new file mode 100644 index 00000000000..fc7a098193a --- /dev/null +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-python-0.23.6.bazel @@ -0,0 +1,166 @@ +############################################################################### +# @generated +# DO NOT MODIFY: This file is auto-generated by a crate_universe tool. To +# regenerate this file, run the following: +# +# bazel run @@//misc/bazel/3rdparty:vendor_tree_sitter_extractors +############################################################################### + +load( + "@rules_rust//cargo:defs.bzl", + "cargo_build_script", + "cargo_toml_env_vars", +) +load("@rules_rust//rust:defs.bzl", "rust_library") + +package(default_visibility = ["//visibility:public"]) + +cargo_toml_env_vars( + name = "cargo_toml_env_vars", + src = "Cargo.toml", +) + +rust_library( + name = "tree_sitter_python", + srcs = glob( + include = ["**/*.rs"], + allow_empty = True, + ), + compile_data = glob( + include = ["**"], + allow_empty = True, + exclude = [ + "**/* *", + ".tmp_git_root/**/*", + "BUILD", + "BUILD.bazel", + "WORKSPACE", + "WORKSPACE.bazel", + ], + ), + crate_root = "bindings/rust/lib.rs", + edition = "2021", + rustc_env_files = [ + ":cargo_toml_env_vars", + ], + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-bazel", + "crate-name=tree-sitter-python", + "manual", + "noclippy", + "norustfmt", + ], + target_compatible_with = select({ + "@rules_rust//rust/platform:aarch64-apple-darwin": [], + "@rules_rust//rust/platform:aarch64-apple-ios": [], + "@rules_rust//rust/platform:aarch64-apple-ios-sim": [], + "@rules_rust//rust/platform:aarch64-linux-android": [], + "@rules_rust//rust/platform:aarch64-pc-windows-msvc": [], + "@rules_rust//rust/platform:aarch64-unknown-fuchsia": [], + "@rules_rust//rust/platform:aarch64-unknown-linux-gnu": [], + "@rules_rust//rust/platform:aarch64-unknown-nixos-gnu": [], + "@rules_rust//rust/platform:aarch64-unknown-nto-qnx710": [], + "@rules_rust//rust/platform:aarch64-unknown-uefi": [], + "@rules_rust//rust/platform:arm-unknown-linux-gnueabi": [], + "@rules_rust//rust/platform:arm-unknown-linux-musleabi": [], + "@rules_rust//rust/platform:armv7-linux-androideabi": [], + "@rules_rust//rust/platform:armv7-unknown-linux-gnueabi": [], + "@rules_rust//rust/platform:i686-apple-darwin": [], + "@rules_rust//rust/platform:i686-linux-android": [], + "@rules_rust//rust/platform:i686-pc-windows-msvc": [], + "@rules_rust//rust/platform:i686-unknown-freebsd": [], + "@rules_rust//rust/platform:i686-unknown-linux-gnu": [], + "@rules_rust//rust/platform:powerpc-unknown-linux-gnu": [], + "@rules_rust//rust/platform:riscv32imc-unknown-none-elf": [], + "@rules_rust//rust/platform:riscv64gc-unknown-linux-gnu": [], + "@rules_rust//rust/platform:riscv64gc-unknown-none-elf": [], + "@rules_rust//rust/platform:s390x-unknown-linux-gnu": [], + "@rules_rust//rust/platform:thumbv7em-none-eabi": [], + "@rules_rust//rust/platform:thumbv8m.main-none-eabi": [], + "@rules_rust//rust/platform:wasm32-unknown-emscripten": [], + "@rules_rust//rust/platform:wasm32-unknown-unknown": [], + "@rules_rust//rust/platform:wasm32-wasip1": [], + "@rules_rust//rust/platform:wasm32-wasip1-threads": [], + "@rules_rust//rust/platform:wasm32-wasip2": [], + "@rules_rust//rust/platform:x86_64-apple-darwin": [], + "@rules_rust//rust/platform:x86_64-apple-ios": [], + "@rules_rust//rust/platform:x86_64-linux-android": [], + "@rules_rust//rust/platform:x86_64-pc-windows-msvc": [], + "@rules_rust//rust/platform:x86_64-unknown-freebsd": [], + "@rules_rust//rust/platform:x86_64-unknown-fuchsia": [], + "@rules_rust//rust/platform:x86_64-unknown-linux-gnu": [], + "@rules_rust//rust/platform:x86_64-unknown-nixos-gnu": [], + "@rules_rust//rust/platform:x86_64-unknown-none": [], + "@rules_rust//rust/platform:x86_64-unknown-uefi": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + version = "0.23.6", + deps = [ + "@vendor_ts__tree-sitter-language-0.1.5//:tree_sitter_language", + "@vendor_ts__tree-sitter-python-0.23.6//:build_script_build", + ], +) + +cargo_build_script( + name = "_bs", + srcs = glob( + include = ["**/*.rs"], + allow_empty = True, + ), + compile_data = glob( + include = ["**"], + allow_empty = True, + exclude = [ + "**/* *", + "**/*.rs", + ".tmp_git_root/**/*", + "BUILD", + "BUILD.bazel", + "WORKSPACE", + "WORKSPACE.bazel", + ], + ), + crate_name = "build_script_build", + crate_root = "bindings/rust/build.rs", + data = glob( + include = ["**"], + allow_empty = True, + exclude = [ + "**/* *", + ".tmp_git_root/**/*", + "BUILD", + "BUILD.bazel", + "WORKSPACE", + "WORKSPACE.bazel", + ], + ), + edition = "2021", + pkg_name = "tree-sitter-python", + rustc_env_files = [ + ":cargo_toml_env_vars", + ], + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-bazel", + "crate-name=tree-sitter-python", + "manual", + "noclippy", + "norustfmt", + ], + version = "0.23.6", + visibility = ["//visibility:private"], + deps = [ + "@vendor_ts__cc-1.2.61//:cc", + ], +) + +alias( + name = "build_script_build", + actual = ":_bs", + tags = ["manual"], +) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ql-0.23.1.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ql-0.23.1.bazel index 0b7ce3a9a29..6d68d04cf00 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ql-0.23.1.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ql-0.23.1.bazel @@ -155,7 +155,7 @@ cargo_build_script( version = "0.23.1", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ruby-0.23.1.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ruby-0.23.1.bazel index f939b4b9493..855b7513311 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ruby-0.23.1.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.tree-sitter-ruby-0.23.1.bazel @@ -155,7 +155,7 @@ cargo_build_script( version = "0.23.1", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.zstd-sys-2.0.16+zstd.1.5.7.bazel b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.zstd-sys-2.0.16+zstd.1.5.7.bazel index 32a06f94788..f2a70faf464 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.zstd-sys-2.0.16+zstd.1.5.7.bazel +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/BUILD.zstd-sys-2.0.16+zstd.1.5.7.bazel @@ -165,7 +165,7 @@ cargo_build_script( version = "2.0.16+zstd.1.5.7", visibility = ["//visibility:private"], deps = [ - "@vendor_ts__cc-1.2.37//:cc", + "@vendor_ts__cc-1.2.61//:cc", "@vendor_ts__pkg-config-0.3.32//:pkg_config", ], ) diff --git a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl index bf11bc6c82f..d1da77819f3 100644 --- a/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl +++ b/misc/bazel/3rdparty/tree_sitter_extractors_deps/defs.bzl @@ -303,7 +303,7 @@ _NORMAL_DEPENDENCIES = { "serde_json": Label("@vendor_ts__serde_json-1.0.145//:serde_json"), "tracing": Label("@vendor_ts__tracing-0.1.41//:tracing"), "tracing-subscriber": Label("@vendor_ts__tracing-subscriber-0.3.20//:tracing_subscriber"), - "tree-sitter": Label("@vendor_ts__tree-sitter-0.25.9//:tree_sitter"), + "tree-sitter": Label("@vendor_ts__tree-sitter-0.26.8//:tree_sitter"), "tree-sitter-embedded-template": Label("@vendor_ts__tree-sitter-embedded-template-0.25.0//:tree_sitter_embedded_template"), "tree-sitter-ruby": Label("@vendor_ts__tree-sitter-ruby-0.23.1//:tree_sitter_ruby"), }, @@ -381,10 +381,28 @@ _NORMAL_DEPENDENCIES = { "serde_json": Label("@vendor_ts__serde_json-1.0.145//:serde_json"), "tracing": Label("@vendor_ts__tracing-0.1.41//:tracing"), "tracing-subscriber": Label("@vendor_ts__tracing-subscriber-0.3.20//:tracing_subscriber"), - "tree-sitter": Label("@vendor_ts__tree-sitter-0.25.9//:tree_sitter"), + "tree-sitter": Label("@vendor_ts__tree-sitter-0.26.8//:tree_sitter"), "zstd": Label("@vendor_ts__zstd-0.13.3//:zstd"), }, }, + "shared/yeast": { + _COMMON_CONDITION: { + "clap": Label("@vendor_ts__clap-4.5.48//:clap"), + "serde": Label("@vendor_ts__serde-1.0.228//:serde"), + "serde_json": Label("@vendor_ts__serde_json-1.0.145//:serde_json"), + "serde_yaml": Label("@vendor_ts__serde_yaml-0.9.34-deprecated//:serde_yaml"), + "tree-sitter": Label("@vendor_ts__tree-sitter-0.26.8//:tree_sitter"), + "tree-sitter-python": Label("@vendor_ts__tree-sitter-python-0.23.6//:tree_sitter_python"), + "tree-sitter-ruby": Label("@vendor_ts__tree-sitter-ruby-0.23.1//:tree_sitter_ruby"), + }, + }, + "shared/yeast-macros": { + _COMMON_CONDITION: { + "proc-macro2": Label("@vendor_ts__proc-macro2-1.0.101//:proc_macro2"), + "quote": Label("@vendor_ts__quote-1.0.41//:quote"), + "syn": Label("@vendor_ts__syn-2.0.106//:syn"), + }, + }, } _NORMAL_ALIASES = { @@ -411,6 +429,14 @@ _NORMAL_ALIASES = { _COMMON_CONDITION: { }, }, + "shared/yeast": { + _COMMON_CONDITION: { + }, + }, + "shared/yeast-macros": { + _COMMON_CONDITION: { + }, + }, } _NORMAL_DEV_DEPENDENCIES = { @@ -431,6 +457,10 @@ _NORMAL_DEV_DEPENDENCIES = { "tree-sitter-ql": Label("@vendor_ts__tree-sitter-ql-0.23.1//:tree_sitter_ql"), }, }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _NORMAL_DEV_ALIASES = { @@ -448,6 +478,10 @@ _NORMAL_DEV_ALIASES = { _COMMON_CONDITION: { }, }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _PROC_MACRO_DEPENDENCIES = { @@ -463,6 +497,10 @@ _PROC_MACRO_DEPENDENCIES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _PROC_MACRO_ALIASES = { @@ -478,6 +516,10 @@ _PROC_MACRO_ALIASES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _PROC_MACRO_DEV_DEPENDENCIES = { @@ -493,6 +535,10 @@ _PROC_MACRO_DEV_DEPENDENCIES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _PROC_MACRO_DEV_ALIASES = { @@ -510,6 +556,10 @@ _PROC_MACRO_DEV_ALIASES = { _COMMON_CONDITION: { }, }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _BUILD_DEPENDENCIES = { @@ -525,6 +575,10 @@ _BUILD_DEPENDENCIES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _BUILD_ALIASES = { @@ -540,6 +594,10 @@ _BUILD_ALIASES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _BUILD_PROC_MACRO_DEPENDENCIES = { @@ -555,6 +613,10 @@ _BUILD_PROC_MACRO_DEPENDENCIES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _BUILD_PROC_MACRO_ALIASES = { @@ -570,6 +632,10 @@ _BUILD_PROC_MACRO_ALIASES = { }, "shared/tree-sitter-extractor": { }, + "shared/yeast": { + }, + "shared/yeast-macros": { + }, } _CONDITIONS = { @@ -923,12 +989,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor_ts__cc-1.2.37", - sha256 = "65193589c6404eb80b450d618eaf9a2cafaaafd57ecce47370519ef674a7bd44", + name = "vendor_ts__cc-1.2.61", + sha256 = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d", type = "tar.gz", - urls = ["https://static.crates.io/crates/cc/1.2.37/download"], - strip_prefix = "cc-1.2.37", - build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.cc-1.2.37.bazel"), + urls = ["https://static.crates.io/crates/cc/1.2.61/download"], + strip_prefix = "cc-1.2.61", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.cc-1.2.61.bazel"), ) maybe( @@ -1373,12 +1439,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor_ts__find-msvc-tools-0.1.1", - sha256 = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d", + name = "vendor_ts__find-msvc-tools-0.1.9", + sha256 = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582", type = "tar.gz", - urls = ["https://static.crates.io/crates/find-msvc-tools/0.1.1/download"], - strip_prefix = "find-msvc-tools-0.1.1", - build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.find-msvc-tools-0.1.1.bazel"), + urls = ["https://static.crates.io/crates/find-msvc-tools/0.1.9/download"], + strip_prefix = "find-msvc-tools-0.1.9", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.find-msvc-tools-0.1.9.bazel"), ) maybe( @@ -3363,12 +3429,12 @@ def crate_repositories(): maybe( http_archive, - name = "vendor_ts__tree-sitter-0.25.9", - sha256 = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa", + name = "vendor_ts__tree-sitter-0.26.8", + sha256 = "887bd495d0582c5e3e0d8ece2233666169fa56a9644d172fc22ad179ab2d0538", type = "tar.gz", - urls = ["https://static.crates.io/crates/tree-sitter/0.25.9/download"], - strip_prefix = "tree-sitter-0.25.9", - build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.tree-sitter-0.25.9.bazel"), + urls = ["https://static.crates.io/crates/tree-sitter/0.26.8/download"], + strip_prefix = "tree-sitter-0.26.8", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.tree-sitter-0.26.8.bazel"), ) maybe( @@ -3401,6 +3467,16 @@ def crate_repositories(): build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.tree-sitter-language-0.1.5.bazel"), ) + maybe( + http_archive, + name = "vendor_ts__tree-sitter-python-0.23.6", + sha256 = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04", + type = "tar.gz", + urls = ["https://static.crates.io/crates/tree-sitter-python/0.23.6/download"], + strip_prefix = "tree-sitter-python-0.23.6", + build_file = Label("//misc/bazel/3rdparty/tree_sitter_extractors_deps:BUILD.tree-sitter-python-0.23.6.bazel"), + ) + maybe( http_archive, name = "vendor_ts__tree-sitter-ql-0.23.1", @@ -4152,13 +4228,15 @@ def crate_repositories(): struct(repo = "vendor_ts__serde-1.0.228", is_dev_dep = False), struct(repo = "vendor_ts__serde_json-1.0.145", is_dev_dep = False), struct(repo = "vendor_ts__serde_with-3.14.1", is_dev_dep = False), + struct(repo = "vendor_ts__serde_yaml-0.9.34-deprecated", is_dev_dep = False), struct(repo = "vendor_ts__syn-2.0.106", is_dev_dep = False), struct(repo = "vendor_ts__toml-0.9.7", is_dev_dep = False), struct(repo = "vendor_ts__tracing-0.1.41", is_dev_dep = False), struct(repo = "vendor_ts__tracing-flame-0.2.0", is_dev_dep = False), struct(repo = "vendor_ts__tracing-subscriber-0.3.20", is_dev_dep = False), - struct(repo = "vendor_ts__tree-sitter-0.25.9", is_dev_dep = False), + struct(repo = "vendor_ts__tree-sitter-0.26.8", is_dev_dep = False), struct(repo = "vendor_ts__tree-sitter-embedded-template-0.25.0", is_dev_dep = False), + struct(repo = "vendor_ts__tree-sitter-python-0.23.6", is_dev_dep = False), struct(repo = "vendor_ts__tree-sitter-ruby-0.23.1", is_dev_dep = False), struct(repo = "vendor_ts__triomphe-0.1.14", is_dev_dep = False), struct(repo = "vendor_ts__ungrammar-1.16.1", is_dev_dep = False), diff --git a/shared/tree-sitter-extractor/BUILD.bazel b/shared/tree-sitter-extractor/BUILD.bazel index ee19d8e5c77..5539641e90e 100644 --- a/shared/tree-sitter-extractor/BUILD.bazel +++ b/shared/tree-sitter-extractor/BUILD.bazel @@ -12,7 +12,9 @@ rust_library( compile_data = [ "src/generator/prefix.dbscheme", ], - deps = all_crate_deps(), + deps = all_crate_deps() + [ + "//shared/yeast", + ], ) alias( diff --git a/shared/yeast-macros/BUILD.bazel b/shared/yeast-macros/BUILD.bazel new file mode 100644 index 00000000000..71bc6eb288a --- /dev/null +++ b/shared/yeast-macros/BUILD.bazel @@ -0,0 +1,12 @@ +load("@rules_rust//rust:defs.bzl", "rust_proc_macro") +load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps") + +exports_files(["Cargo.toml"]) + +rust_proc_macro( + name = "yeast-macros", + srcs = glob(["src/**/*.rs"]), + aliases = aliases(), + visibility = ["//visibility:public"], + deps = all_crate_deps(), +) diff --git a/shared/yeast/BUILD.bazel b/shared/yeast/BUILD.bazel new file mode 100644 index 00000000000..fe0b01bb87b --- /dev/null +++ b/shared/yeast/BUILD.bazel @@ -0,0 +1,18 @@ +load("@rules_rust//rust:defs.bzl", "rust_library") +load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps") + +exports_files(["Cargo.toml"]) + +rust_library( + name = "yeast", + srcs = glob( + ["src/**/*.rs"], + exclude = ["src/bin/**"], + ), + aliases = aliases(), + proc_macro_deps = [ + "//shared/yeast-macros", + ], + visibility = ["//visibility:public"], + deps = all_crate_deps(), +) From a0a0e9e9a75b3290dbd662b2366fed8c03f15168 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 6 May 2026 11:45:53 +0000 Subject: [PATCH 11/11] yeast: Add test for chained rules with output-only kinds Adds a regression test verifying that desugaring rules can chain across output-only node kinds: a first rule rewrites an input kind to an output-only kind, and a second rule then rewrites that output-only kind into another output-only kind. This exercises the schema lookup for query patterns whose root kind is not present in the input tree-sitter grammar. --- shared/yeast/tests/node-types.yml | 12 ++++++++++- shared/yeast/tests/test.rs | 35 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/shared/yeast/tests/node-types.yml b/shared/yeast/tests/node-types.yml index 978ef147543..8416d377ee8 100644 --- a/shared/yeast/tests/node-types.yml +++ b/shared/yeast/tests/node-types.yml @@ -4,7 +4,7 @@ named: program: - stmt*: [assignment, call, identifier, for] + stmt*: [assignment, call, identifier, for, first_node, second_node] assignment: left: [identifier, left_assignment_list] @@ -50,6 +50,16 @@ named: identifier: integer: + # Output-only kinds, used by tests of chained desugaring rules. + # Neither exists in the input tree-sitter-ruby grammar. + first_node: + left: [identifier, integer] + right: [identifier, integer] + + second_node: + left: [identifier, integer] + right: [identifier, integer] + unnamed: - "=" - "," diff --git a/shared/yeast/tests/test.rs b/shared/yeast/tests/test.rs index 406c990a8bd..e4485857bff 100644 --- a/shared/yeast/tests/test.rs +++ b/shared/yeast/tests/test.rs @@ -347,6 +347,41 @@ fn test_shorthand_rule() { ); } +#[test] +fn test_chained_rules_output_only_kind() { + // Exercise rule chaining where an intermediate kind exists only in the + // output schema (not in the input tree-sitter grammar): + // assignment → first_node (input → output-only) + // first_node → second_node (output-only → output-only) + // The matcher must look up `first_node` against the schema, which only + // knows about it via the YAML node-types file. + let assignment_to_first = yeast::rule!( + (assignment + left: (_) @left + right: (_) @right + ) + => first_node + ); + let first_to_second = yeast::rule!( + (first_node + left: (_) @left + right: (_) @right + ) + => second_node + ); + + let dump = run_and_dump("x = 1", vec![assignment_to_first, first_to_second]); + assert_dump_eq( + &dump, + r#" + program + second_node + left: identifier "x" + right: integer "1" + "#, + ); +} + // ---- Cursor tests ---- #[test]