unified: vendor in tree-sitter-swift

2026-05-14 03:09:26 +02:00 · 2026-05-08 13:41:14 +00:00
parent 36554d160c
commit 9f6bd88171
27 changed files with 599675 additions and 0 deletions
--- a/unified/extractor/tree-sitter-swift/BUILD.bazel
+++ b/unified/extractor/tree-sitter-swift/BUILD.bazel
@@ -0,0 +1,40 @@
+load("@rules_rust//cargo:defs.bzl", "cargo_build_script")
+load("@rules_rust//rust:defs.bzl", "rust_library")
+load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps")
+
+package(default_visibility = ["//visibility:public"])
+
+# This will run the build script from the root of the workspace, and
+# collect the outputs.
+cargo_build_script(
+    name = "tree-sitter-swift-build",
+    srcs = ["bindings/rust/build.rs"],
+    data = glob([
+        "src/**",
+    ]),
+    deps = all_crate_deps(
+        build = True,
+    ),
+)
+
+rust_library(
+    name = "tree-sitter-swift",
+    srcs = [
+        "bindings/rust/lib.rs",
+    ],
+    aliases = aliases(),
+    compile_data = glob([
+        "src/**",
+        "queries/**",
+    ]) + [
+        "grammar.js",
+    ],
+    proc_macro_deps = all_crate_deps(
+        proc_macro = True,
+    ),
+    deps = [":tree-sitter-swift-build"] + all_crate_deps(
+        normal = True,
+    ),
+)
+
+exports_files(["Cargo.toml"])
--- a/unified/extractor/tree-sitter-swift/Cargo.toml
+++ b/unified/extractor/tree-sitter-swift/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "tree-sitter-swift"
+description = "Swift grammar for the tree-sitter parsing library (vendored copy for the unified extractor)"
+version = "0.7.2"
+keywords = ["incremental", "parsing", "swift"]
+categories = ["parsing", "text-editors"]
+repository = "https://github.com/alex-pinkus/tree-sitter-swift"
+edition = "2024"
+license = "MIT"
+
+build = "bindings/rust/build.rs"
+
+[lib]
+path = "bindings/rust/lib.rs"
+
+# When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
+[dependencies]
+tree-sitter-language = "0.1"
+
+[build-dependencies]
+cc = "1.2"
--- a/unified/extractor/tree-sitter-swift/LICENSE
+++ b/unified/extractor/tree-sitter-swift/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 alex-pinkus
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/unified/extractor/tree-sitter-swift/README.md
+++ b/unified/extractor/tree-sitter-swift/README.md
@@ -0,0 +1,127 @@
+![Parse rate badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/parse_rate)
+[![Crates.io badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/crates_io_version)](https://crates.io/crates/tree-sitter-swift)
+[![NPM badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/npm_version)](https://www.npmjs.com/package/tree-sitter-swift)
+[![Build](https://github.com/alex-pinkus/tree-sitter-swift/actions/workflows/top-repos.yml/badge.svg)](https://github.com/alex-pinkus/tree-sitter-swift/actions/workflows/top-repos.yml)
+
+# tree-sitter-swift
+
+This contains a [`tree-sitter`](https://tree-sitter.github.io/tree-sitter) grammar for the Swift programming language.
+
+## Getting started
+
+To use this parser to parse Swift code, you'll want to depend on either the Rust crate or the NPM package.
+
+### Rust
+
+To use the Rust crate, you'll add this to your `Cargo.toml`:
+
+```
+tree-sitter = "0.23.0"
+tree-sitter-swift = "=0.7.0"
+```
+
+Then you can use a `tree-sitter` parser with the language declared here:
+
+```
+let mut parser = tree_sitter::Parser::new();
+parser.set_language(tree_sitter_swift::language())?;
+
+// ...
+
+let tree = parser.parse(&my_source_code, None)
+    .ok_or_else(|| /* error handling code */)?;
+```
+
+### Javascript
+
+To use this from NPM, you'll add similar dependencies to `package.json`:
+
+```
+"dependencies: {
+  "tree-sitter-swift": "0.7.0",
+  "tree-sitter": "^0.22.1"
+}
+```
+
+Your usage of the parser will look like:
+
+```
+const Parser = require("tree-sitter");
+const Swift = require("tree-sitter-swift");
+
+const parser = new Parser();
+parser.setLanguage(Swift);
+
+// ...
+
+const tree = parser.parse(mySourceCode);
+```
+
+### Editing the grammar
+
+With this package checked out, a common workflow for editing the grammar will look something like:
+
+1. Make a change to `grammar.ts`.
+2. Run `npm install && npm test` to see whether the change has had impact on existing parsing behavior. The default
+   `npm test` target requires `valgrind` to be installed; if you do not have it installed, and do not wish to, you can
+   substitute `tree-sitter test` directly.
+3. Run `tree-sitter parse` on some real Swift codebase and see whether (or where) it fails.
+4. Use any failures to create new corpus test cases.
+
+## Contributions
+
+All contributions to this repository are welcome.
+
+If said contribution is to check generated files (e.g., `parser.c`) into the repository, be aware that your contribution will not be accepted. Make sure to read the [FAQ entry](https://github.com/alex-pinkus/tree-sitter-swift?tab=readme-ov-file#where-is-your-parserc) and the [prior](https://github.com/alex-pinkus/tree-sitter-swift/issues/362) [discussions](https://github.com/alex-pinkus/tree-sitter-swift/pull/315) and [compromises](https://github.com/alex-pinkus/tree-sitter-swift/issues/149) that have occurred already on this topic.
+
+## Using tree-sitter-swift in Web Assembly
+
+To use tree-sitter-swift as a language for the web bindings version tree-sitter, which will likely be a more modern version than the published node
+module. [see](https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md). Follow the instructions below
+
+1. Install the node modules `npm install web-tree-sitter tree-sitter-swift`
+2. Run the tree-sitter cli to create the wasm bundle
+   ```sh
+   $ npx tree-sitter build-asm ./node_modules/tree-sitter
+   ```
+3. Boot tree-sitter wasm like this.
+
+```js
+const Parser = require("web-tree-sitter");
+async function run() {
+  //needs to happen first
+  await Parser.init();
+  //wait for the load of swift
+  const Swift = await Parser.Language.load("./tree-sitter-swift.wasm");
+
+  const parser = new Parser();
+  parser.setLanguage(Swift);
+
+  //Parse your swift code here.
+  const tree = parser.parse('print("Hello, World!")');
+}
+//if you want to run this
+run().then(console.log, console.error);
+```
+
+## Frequently asked questions
+
+### Where is your `parser.c`?
+
+This repository currently omits most of the code that is autogenerated during a build. This means, for instance, that
+`grammar.json` and `parser.c` are both only available following a build. It also significantly reduces noise during
+diffs.
+
+The side benefit of not checking in `parser.c` is that you can guarantee backwards compatibility. Parsers generated by
+the tree-sitter CLI aren't always backwards compatible. If you need a parser, generate it yourself using the CLI; all
+the information to do so is available in this package. By doing that, you'll also know for sure that your parser version
+and your library version are compatible.
+
+If you need a `parser.c`, and you don't care about the tree-sitter version, but you don't have a local setup that would
+allow you to obtain the parser, you can just download one from a recent workflow run in this package. To do so:
+
+- Go to the [GitHub actions page](https://github.com/alex-pinkus/tree-sitter-swift/actions) for this
+  repository.
+- Click on the "Publish `grammar.json` and `parser.c`" action for the appropriate commit.
+- Go down to `Artifacts` and click on `generated-parser-src`. All the relevant parser files will be available in your
+  download.
--- a/unified/extractor/tree-sitter-swift/binding.gyp
+++ b/unified/extractor/tree-sitter-swift/binding.gyp
@@ -0,0 +1,44 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_swift_binding",
+      "dependencies": [
+        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
+      ],
+      "include_dirs": [
+        "src",
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+        # NOTE: if your language has an external scanner, add it here.
+        "src/scanner.c",
+      ],
+      "cflags_c": [
+        "-std=c11",
+      ],
+      "actions": [
+          {
+	      "action_name": "wait_for_tree_sitter",
+	      "action": ["node", "scripts/wait-for-tree-sitter.js"],
+	      "inputs": [],
+	      "outputs": ["node_modules/tree-sitter-cli"]
+	  },
+          {
+	      "action_name": "generate_header_files",
+	      "inputs": [
+	          "grammar.js",
+		  "node_modules/tree-sitter-cli"
+	      ],
+	      "outputs": [
+	          "src/grammar.json",
+		  "src/node-types.json",
+		  "src/parser.c",
+		  "src/tree_sitter",
+	      ],
+	      "action": ["tree-sitter", "generate", "--no-bindings"],
+	  }
+      ]
+    }
+  ]
+}
--- a/unified/extractor/tree-sitter-swift/bindings/node/binding.cc
+++ b/unified/extractor/tree-sitter-swift/bindings/node/binding.cc
@@ -0,0 +1,20 @@
+#include <napi.h>
+
+typedef struct TSLanguage TSLanguage;
+
+extern "C" TSLanguage *tree_sitter_swift();
+
+// "tree-sitter", "language" hashed with BLAKE2
+const napi_type_tag LANGUAGE_TYPE_TAG = {
+  0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
+};
+
+Napi::Object Init(Napi::Env env, Napi::Object exports) {
+    exports["name"] = Napi::String::New(env, "swift");
+    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_swift());
+    language.TypeTag(&LANGUAGE_TYPE_TAG);
+    exports["language"] = language;
+    return exports;
+}
+
+NODE_API_MODULE(tree_sitter_swift_binding, Init)
--- a/unified/extractor/tree-sitter-swift/bindings/node/index.js
+++ b/unified/extractor/tree-sitter-swift/bindings/node/index.js
@@ -0,0 +1,7 @@
+const root = require("path").join(__dirname, "..", "..");
+
+module.exports = require("node-gyp-build")(root);
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}
--- a/unified/extractor/tree-sitter-swift/bindings/rust/build.rs
+++ b/unified/extractor/tree-sitter-swift/bindings/rust/build.rs
@@ -0,0 +1,19 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.std("c11").include(src_dir);
+
+    #[cfg(target_env = "msvc")]
+    c_config.flag("-utf-8");
+
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    let scanner_path = src_dir.join("scanner.c");
+    c_config.file(&scanner_path);
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+
+    c_config.compile("tree-sitter-swift");
+}
--- a/unified/extractor/tree-sitter-swift/bindings/rust/lib.rs
+++ b/unified/extractor/tree-sitter-swift/bindings/rust/lib.rs
@@ -0,0 +1,68 @@
+//! This crate provides Swift language support for the [tree-sitter][] parsing library.
+//!
+//! Typically, you will use the [language][language func] function to add this language to a
+//! tree-sitter [Parser][], and then use the parser to parse some code:
+//!
+//! ```
+//! let code = r#"
+//! "#;
+//! let mut parser = tree_sitter::Parser::new();
+//! let language = tree_sitter_swift::LANGUAGE;
+//! parser
+//!     .set_language(&language.into())
+//!     .expect("Error loading Swift parser");
+//! let tree = parser.parse(code, None).unwrap();
+//! assert!(!tree.root_node().has_error());
+//! ```
+//!
+//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+//! [language func]: fn.language.html
+//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
+//! [tree-sitter]: https://tree-sitter.github.io/
+
+use tree_sitter_language::LanguageFn;
+
+unsafe extern "C" {
+    fn tree_sitter_swift() -> *const ();
+}
+
+/// The tree-sitter [`LanguageFn`] for this grammar.
+pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_swift) };
+
+/// The content of the [`node-types.json`][] file for this grammar.
+///
+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
+
+pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
+pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
+pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
+pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_can_load_grammar() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(&super::LANGUAGE.into())
+            .expect("Error loading Swift parser");
+    }
+
+    #[test]
+    fn test_can_parse_basic_file() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(&super::LANGUAGE.into())
+            .expect("Error loading Swift parser");
+
+        let tree = parser
+            .parse("_ = \"Hello!\"\n", None)
+            .expect("Unable to parse!");
+
+        assert_eq!(
+            "(source_file (assignment target: (directly_assignable_expression (simple_identifier)) result: (line_string_literal text: (line_str_text))))",
+            tree.root_node().to_sexp(),
+        );
+    }
+}
--- a/unified/extractor/tree-sitter-swift/grammar.js
+++ b/unified/extractor/tree-sitter-swift/grammar.js
--- a/unified/extractor/tree-sitter-swift/package.json
+++ b/unified/extractor/tree-sitter-swift/package.json
@@ -0,0 +1,68 @@
+{
+  "name": "tree-sitter-swift",
+  "version": "0.7.2",
+  "description": "A tree-sitter grammar for the Swift programming language.",
+  "main": "bindings/node/index.js",
+  "types": "bindings/node",
+  "scripts": {
+    "install": "node-gyp-build",
+    "prestart": "tree-sitter build --wasm",
+    "start": "tree-sitter playground",
+    "test": "node --test bindings/node/*_test.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
+  },
+  "tree-sitter": [
+    {
+      "scope": "source.swift",
+      "file-types": [
+        "swift"
+      ],
+      "injection-regex": "swift",
+      "highlights": "queries/highlights.scm",
+      "locals": "queries/locals.scm",
+      "injections": "queries/injections.scm"
+    }
+  ],
+  "keywords": [
+    "parser",
+    "swift"
+  ],
+  "files": [
+    "grammar.js",
+    "tree-sitter.json",
+    "binding.gyp",
+    "prebuilds/**",
+    "bindings/node/*",
+    "queries/*",
+    "scripts/*",
+    "src/**"
+  ],
+  "author": "Alex Pinkus <alex.pinkus@gmail.com>",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/alex-pinkus/tree-sitter-swift/issues"
+  },
+  "homepage": "https://github.com/alex-pinkus/tree-sitter-swift#readme",
+  "dependencies": {
+    "node-addon-api": "^8.0.0",
+    "node-gyp-build": "^4.8.0",
+    "tree-sitter-cli": "^0.23",
+    "which": "2.0.2"
+  },
+  "peerDependencies": {
+    "tree-sitter": "^0.22.1"
+  },
+  "peerDependenciesMeta": {
+    "tree_sitter": {
+      "optional": true
+    }
+  },
+  "devDependencies": {
+    "node-gyp": "^10.0.1",
+    "prebuildify": "^6.0.0",
+    "prettier": "2.3.2"
+  }
+}
--- a/unified/extractor/tree-sitter-swift/queries/folds.scm
+++ b/unified/extractor/tree-sitter-swift/queries/folds.scm
@@ -0,0 +1,35 @@
+; format-ignore
+[
+  (protocol_body)               ; protocol Foo { ... }
+  (class_body)                  ; class Foo { ... }
+  (enum_class_body)             ; enum Foo { ... }
+  (function_body)               ; func Foo (...) {...}
+  (computed_property)           ; { ... }
+
+  (computed_getter)             ; get { ... }
+  (computed_setter)             ; set { ... }
+
+  (do_statement)
+  (if_statement)
+  (for_statement)
+  (switch_statement)
+  (while_statement)
+  (guard_statement)
+  (switch_entry)
+
+  (type_parameters)             ; x<Foo>
+  (tuple_type)                  ; (...)
+  (array_type)                  ; [String]
+  (dictionary_type)             ; [Foo: Bar]
+
+  (call_expression)             ; callFunc(...)
+  (tuple_expression)            ; ( foo + bar )
+  (array_literal)               ; [ foo, bar ]
+  (dictionary_literal)          ; [ foo: bar, x: y ]
+  (lambda_literal)
+  (willset_didset_block)
+  (willset_clause)
+  (didset_clause)
+
+  (import_declaration)+
+] @fold
--- a/unified/extractor/tree-sitter-swift/queries/highlights.scm
+++ b/unified/extractor/tree-sitter-swift/queries/highlights.scm
@@ -0,0 +1,336 @@
+[
+  "."
+  ";"
+  ":"
+  ","
+] @punctuation.delimiter
+
+[
+  "("
+  ")"
+  "["
+  "]"
+  "{"
+  "}"
+] @punctuation.bracket
+
+; Identifiers
+(type_identifier) @type
+
+[
+  (self_expression)
+  (super_expression)
+] @variable.builtin
+
+; Declarations
+[
+  "func"
+  "deinit"
+] @keyword.function
+
+[
+  (visibility_modifier)
+  (member_modifier)
+  (function_modifier)
+  (property_modifier)
+  (parameter_modifier)
+  (inheritance_modifier)
+  (mutation_modifier)
+] @keyword.modifier
+
+(simple_identifier) @variable
+
+(function_declaration
+  (simple_identifier) @function.method)
+
+(protocol_function_declaration
+  name: (simple_identifier) @function.method)
+
+(init_declaration
+  "init" @constructor)
+
+(parameter
+  external_name: (simple_identifier) @variable.parameter)
+
+(parameter
+  name: (simple_identifier) @variable.parameter)
+
+(type_parameter
+  (type_identifier) @variable.parameter)
+
+(inheritance_constraint
+  (identifier
+    (simple_identifier) @variable.parameter))
+
+(equality_constraint
+  (identifier
+    (simple_identifier) @variable.parameter))
+
+[
+  "protocol"
+  "extension"
+  "indirect"
+  "nonisolated"
+  "override"
+  "convenience"
+  "required"
+  "some"
+  "any"
+  "weak"
+  "unowned"
+  "didSet"
+  "willSet"
+  "subscript"
+  "let"
+  "var"
+  (throws)
+  (where_keyword)
+  (getter_specifier)
+  (setter_specifier)
+  (modify_specifier)
+  (else)
+  (as_operator)
+] @keyword
+
+[
+  "enum"
+  "struct"
+  "class"
+  "typealias"
+] @keyword.type
+
+[
+  "async"
+  "await"
+] @keyword.coroutine
+
+(shebang_line) @keyword.directive
+
+(class_body
+  (property_declaration
+    (pattern
+      (simple_identifier) @variable.member)))
+
+(protocol_property_declaration
+  (pattern
+    (simple_identifier) @variable.member))
+
+(navigation_expression
+  (navigation_suffix
+    (simple_identifier) @variable.member))
+
+(value_argument
+  name: (value_argument_label
+    (simple_identifier) @variable.member))
+
+(import_declaration
+  "import" @keyword.import)
+
+(enum_entry
+  "case" @keyword)
+
+(modifiers
+  (attribute
+    "@" @attribute
+    (user_type
+      (type_identifier) @attribute)))
+
+; Function calls
+(call_expression
+  (simple_identifier) @function.call) ; foo()
+
+(call_expression
+  ; foo.bar.baz(): highlight the baz()
+  (navigation_expression
+    (navigation_suffix
+      (simple_identifier) @function.call)))
+
+(call_expression
+  (prefix_expression
+    (simple_identifier) @function.call)) ; .foo()
+
+((navigation_expression
+  (simple_identifier) @type) ; SomeType.method(): highlight SomeType as a type
+  (#match? @type "^[A-Z]"))
+
+(directive) @keyword.directive
+
+; See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Keywords-and-Punctuation
+[
+  (diagnostic)
+  (availability_condition)
+  (playground_literal)
+  (key_path_string_expression)
+  (selector_expression)
+  (external_macro_definition)
+] @function.macro
+
+(special_literal) @constant.macro
+
+; Statements
+(for_statement
+  "for" @keyword.repeat)
+
+(for_statement
+  "in" @keyword.repeat)
+
+[
+  "while"
+  "repeat"
+  "continue"
+  "break"
+] @keyword.repeat
+
+(guard_statement
+  "guard" @keyword.conditional)
+
+(if_statement
+  "if" @keyword.conditional)
+
+(switch_statement
+  "switch" @keyword.conditional)
+
+(switch_entry
+  "case" @keyword)
+
+(switch_entry
+  "fallthrough" @keyword)
+
+(switch_entry
+  (default_keyword) @keyword)
+
+"return" @keyword.return
+
+(ternary_expression
+  [
+    "?"
+    ":"
+  ] @keyword.conditional.ternary)
+
+[
+  (try_operator)
+  "do"
+  (throw_keyword)
+  (catch_keyword)
+] @keyword.exception
+
+(statement_label) @label
+
+; Comments
+[
+  (comment)
+  (multiline_comment)
+] @comment @spell
+
+((comment) @comment.documentation
+  (#match? @comment.documentation "^///[^/]"))
+
+((comment) @comment.documentation
+  (#match? @comment.documentation "^///$"))
+
+((multiline_comment) @comment.documentation
+  (#match? @comment.documentation "^/[*][*][^*].*[*]/$"))
+
+; String literals
+(line_str_text) @string
+
+(str_escaped_char) @string.escape
+
+(multi_line_str_text) @string
+
+(raw_str_part) @string
+
+(raw_str_end_part) @string
+
+(line_string_literal
+  [
+    "\\("
+    ")"
+  ] @punctuation.special)
+
+(multi_line_string_literal
+  [
+    "\\("
+    ")"
+  ] @punctuation.special)
+
+(raw_str_interpolation
+  [
+    (raw_str_interpolation_start)
+    ")"
+  ] @punctuation.special)
+
+[
+  "\""
+  "\"\"\""
+] @string
+
+; Lambda literals
+(lambda_literal
+  "in" @keyword.operator)
+
+; Basic literals
+[
+  (integer_literal)
+  (hex_literal)
+  (oct_literal)
+  (bin_literal)
+] @number
+
+(real_literal) @number.float
+
+(boolean_literal) @boolean
+
+"nil" @constant.builtin
+
+(wildcard_pattern) @character.special
+
+; Regex literals
+(regex_literal) @string.regexp
+
+; Operators
+(custom_operator) @operator
+
+[
+  "+"
+  "-"
+  "*"
+  "/"
+  "%"
+  "="
+  "+="
+  "-="
+  "*="
+  "/="
+  "<"
+  ">"
+  "<<"
+  ">>"
+  "<="
+  ">="
+  "++"
+  "--"
+  "^"
+  "&"
+  "&&"
+  "|"
+  "||"
+  "~"
+  "%="
+  "!="
+  "!=="
+  "=="
+  "==="
+  "?"
+  "??"
+  "->"
+  "..<"
+  "..."
+  (bang)
+] @operator
+
+(type_arguments
+  [
+    "<"
+    ">"
+  ] @punctuation.bracket)
--- a/unified/extractor/tree-sitter-swift/queries/indents.scm
+++ b/unified/extractor/tree-sitter-swift/queries/indents.scm
@@ -0,0 +1,123 @@
+; format-ignore
+[
+  ; ... refers to the section that will get affected by this indent.begin capture
+  (protocol_body)               ; protocol Foo { ... }
+  (class_body)                  ; class Foo { ... }
+  (enum_class_body)             ; enum Foo { ... }
+  (function_declaration)        ; func Foo (...) {...}
+  (init_declaration)            ; init(...) {...}
+  (deinit_declaration)          ; deinit {...}
+  (computed_property)           ; { ... }
+  (subscript_declaration)       ; subscript Foo(...) { ... }
+
+  (computed_getter)             ; get { ... }
+  (computed_setter)             ; set { ... }
+
+  (assignment)                  ; a = b
+
+  (control_transfer_statement)  ; return ...
+  (for_statement)
+  (while_statement)
+  (repeat_while_statement)
+  (do_statement)
+  (if_statement)
+  (switch_statement)
+  (guard_statement)
+
+  (type_parameters)             ; x<Foo>
+  (tuple_type)                  ; (...)
+  (array_type)                  ; [String]
+  (dictionary_type)             ; [Foo: Bar]
+
+  (call_expression)             ; callFunc(...)
+  (tuple_expression)            ; ( foo + bar )
+  (array_literal)               ; [ foo, bar ]
+  (dictionary_literal)          ; [ foo: bar, x: y ]
+  (lambda_literal)
+  (willset_didset_block)
+  (willset_clause)
+  (didset_clause)
+] @indent.begin
+
+(init_declaration) @indent.begin
+
+(init_declaration
+  [
+    "init"
+    "("
+  ] @indent.branch)
+
+; indentation for init parameters
+(init_declaration
+  ")" @indent.branch @indent.end)
+
+(init_declaration
+  (parameter) @indent.begin
+  (#set! indent.immediate))
+
+; @something(...)
+(modifiers
+  (attribute) @indent.begin)
+
+(function_declaration
+  (modifiers
+    .
+    (attribute)
+    (_)* @indent.branch)
+  .
+  _ @indent.branch
+  (#not-kind-eq? @indent.branch "type_parameters" "parameter"))
+
+(ERROR
+  [
+    "<"
+    "{"
+    "("
+    "["
+  ]) @indent.begin
+
+; if-elseif
+(if_statement
+  (if_statement) @indent.dedent)
+
+; case Foo:
+; default Foo:
+; @attribute default Foo:
+(switch_entry
+  .
+  _ @indent.branch)
+
+(function_declaration
+  ")" @indent.branch)
+
+(type_parameters
+  ">" @indent.branch @indent.end .)
+
+(tuple_expression
+  ")" @indent.branch @indent.end)
+
+(value_arguments
+  ")" @indent.branch @indent.end)
+
+(tuple_type
+  ")" @indent.branch @indent.end)
+
+(modifiers
+  (attribute
+    ")" @indent.branch @indent.end))
+
+[
+  "}"
+  "]"
+] @indent.branch @indent.end
+
+[
+  ; (ERROR)
+  (comment)
+  (multiline_comment)
+  (raw_str_part)
+  (multi_line_string_literal)
+] @indent.auto
+
+(directive) @indent.ignore
+
--- a/unified/extractor/tree-sitter-swift/queries/injections.scm
+++ b/unified/extractor/tree-sitter-swift/queries/injections.scm
@@ -0,0 +1,10 @@
+; Parse regex syntax within regex literals
+
+((regex_literal) @injection.content
+ (#set! injection.language "regex"))
+
+([
+  (comment)
+  (multiline_comment)
+] @injection.content
+  (#set! injection.language "comment"))
--- a/unified/extractor/tree-sitter-swift/queries/locals.scm
+++ b/unified/extractor/tree-sitter-swift/queries/locals.scm
@@ -0,0 +1,23 @@
+(import_declaration
+  (identifier) @local.definition.import)
+
+(function_declaration
+  name: (simple_identifier) @local.definition.function)
+
+; Scopes
+[
+  (statements)
+  (for_statement)
+  (while_statement)
+  (repeat_while_statement)
+  (do_statement)
+  (if_statement)
+  (guard_statement)
+  (switch_statement)
+  (property_declaration)
+  (function_declaration)
+  (class_declaration)
+  (protocol_declaration)
+] @local.scope
+
+
--- a/unified/extractor/tree-sitter-swift/queries/outline.scm
+++ b/unified/extractor/tree-sitter-swift/queries/outline.scm
@@ -0,0 +1,66 @@
+(protocol_declaration
+    declaration_kind: "protocol" @name
+    .
+    _ * @name
+    .
+    body: (protocol_body)
+) @item
+
+(class_declaration
+    declaration_kind: (
+        [
+            "actor"
+            "class"
+            "extension"
+            "enum"
+            "struct"
+        ]
+    ) @name
+    .
+    _ * @name
+    .
+    body: (_)
+) @item
+
+(init_declaration
+    name: "init" @name
+    .
+    _ * @name
+    .
+    body: (function_body)
+) @item
+
+(deinit_declaration
+    "deinit" @name) @item
+
+(function_declaration
+    "func" @name
+    .
+    _ * @name
+    .
+    body: (function_body)
+) @item
+
+(class_body
+    (property_declaration
+        (value_binding_pattern) @name
+        name: (pattern) @name
+        (type_annotation)? @name
+    ) @item
+)
+
+(enum_class_body
+    (property_declaration
+        (value_binding_pattern) @name
+        name: (pattern) @name
+        (type_annotation)? @name
+    ) @item
+)
+
+(
+    (protocol_function_declaration) @name
+) @item
+
+(
+    (protocol_property_declaration) @name
+) @item
--- a/unified/extractor/tree-sitter-swift/queries/tags.scm
+++ b/unified/extractor/tree-sitter-swift/queries/tags.scm
@@ -0,0 +1,51 @@
+(class_declaration
+  name: (type_identifier) @name) @definition.class
+
+(protocol_declaration
+  name: (type_identifier) @name) @definition.interface
+
+(class_declaration
+    (class_body
+        [
+            (function_declaration
+                name: (simple_identifier) @name
+            )
+            (subscript_declaration
+                (parameter (simple_identifier) @name)
+            )
+            (init_declaration "init" @name)
+            (deinit_declaration "deinit" @name)
+        ]
+    )
+) @definition.method
+
+(protocol_declaration
+    (protocol_body
+        [
+            (protocol_function_declaration
+                name: (simple_identifier) @name
+            )
+            (subscript_declaration
+                (parameter (simple_identifier) @name)
+            )
+            (init_declaration "init" @name)
+        ]
+    )
+) @definition.method
+
+(class_declaration
+    (class_body
+        [
+            (property_declaration
+                (pattern (simple_identifier) @name)
+            )
+        ]
+    )
+) @definition.property
+
+(property_declaration
+    (pattern (simple_identifier) @name)
+) @definition.property
+
+(function_declaration
+    name: (simple_identifier) @name) @definition.function
--- a/unified/extractor/tree-sitter-swift/queries/textobjects.scm
+++ b/unified/extractor/tree-sitter-swift/queries/textobjects.scm
@@ -0,0 +1,19 @@
+
+
+; MARK: Structure
+
+(function_declaration
+  body: (_) @function.inside) @function.around
+
+; TODO: Classes/structs/enums
+
+
+; MARK: Tests
+
+; Only matches prefix test. Other conventions
+; might be nice to add!
+(function_declaration
+	name: (simple_identifier) @_name
+	(#match? @_name "^test")
+)
+
--- a/unified/extractor/tree-sitter-swift/src/grammar.json
+++ b/unified/extractor/tree-sitter-swift/src/grammar.json
--- a/unified/extractor/tree-sitter-swift/src/node-types.json
+++ b/unified/extractor/tree-sitter-swift/src/node-types.json
--- a/unified/extractor/tree-sitter-swift/src/parser.c
+++ b/unified/extractor/tree-sitter-swift/src/parser.c
--- a/unified/extractor/tree-sitter-swift/src/scanner.c
+++ b/unified/extractor/tree-sitter-swift/src/scanner.c
@@ -0,0 +1,929 @@
+#include "tree_sitter/parser.h"
+#include <string.h>
+#include <wctype.h>
+
+#define TOKEN_COUNT 33
+
+enum TokenType {
+    BLOCK_COMMENT,
+    RAW_STR_PART,
+    RAW_STR_CONTINUING_INDICATOR,
+    RAW_STR_END_PART,
+    IMPLICIT_SEMI,
+    EXPLICIT_SEMI,
+    ARROW_OPERATOR,
+    DOT_OPERATOR,
+    CONJUNCTION_OPERATOR,
+    DISJUNCTION_OPERATOR,
+    NIL_COALESCING_OPERATOR,
+    EQUAL_SIGN,
+    EQ_EQ,
+    PLUS_THEN_WS,
+    MINUS_THEN_WS,
+    BANG,
+    THROWS_KEYWORD,
+    RETHROWS_KEYWORD,
+    DEFAULT_KEYWORD,
+    WHERE_KEYWORD,
+    ELSE_KEYWORD,
+    CATCH_KEYWORD,
+    AS_KEYWORD,
+    AS_QUEST,
+    AS_BANG,
+    ASYNC_KEYWORD,
+    CUSTOM_OPERATOR,
+    HASH_SYMBOL,
+    DIRECTIVE_IF,
+    DIRECTIVE_ELSEIF,
+    DIRECTIVE_ELSE,
+    DIRECTIVE_ENDIF,
+    FAKE_TRY_BANG
+};
+
+#define OPERATOR_COUNT 20
+
+const char* OPERATORS[OPERATOR_COUNT] = {
+    "->",
+    ".",
+    "&&",
+    "||",
+    "??",
+    "=",
+    "==",
+    "+",
+    "-",
+    "!",
+    "throws",
+    "rethrows",
+    "default",
+    "where",
+    "else",
+    "catch",
+    "as",
+    "as?",
+    "as!",
+    "async"
+};
+
+enum IllegalTerminatorGroup {
+    ALPHANUMERIC,
+    OPERATOR_SYMBOLS,
+    OPERATOR_OR_DOT,
+    NON_WHITESPACE
+};
+
+const enum IllegalTerminatorGroup OP_ILLEGAL_TERMINATORS[OPERATOR_COUNT] = {
+    OPERATOR_SYMBOLS, // ->
+    OPERATOR_OR_DOT,  // .
+    OPERATOR_SYMBOLS, // &&
+    OPERATOR_SYMBOLS, // ||
+    OPERATOR_SYMBOLS, // ??
+    OPERATOR_SYMBOLS, // =
+    OPERATOR_SYMBOLS, // ==
+    NON_WHITESPACE,   // +
+    NON_WHITESPACE,   // -
+    OPERATOR_SYMBOLS, // !
+    ALPHANUMERIC,     // throws
+    ALPHANUMERIC,     // rethrows
+    ALPHANUMERIC,     // default
+    ALPHANUMERIC,     // where
+    ALPHANUMERIC,     // else
+    ALPHANUMERIC,     // catch
+    ALPHANUMERIC,     // as
+    OPERATOR_SYMBOLS, // as?
+    OPERATOR_SYMBOLS, // as!
+    ALPHANUMERIC      // async
+};
+
+const enum TokenType OP_SYMBOLS[OPERATOR_COUNT] = {
+    ARROW_OPERATOR,
+    DOT_OPERATOR,
+    CONJUNCTION_OPERATOR,
+    DISJUNCTION_OPERATOR,
+    NIL_COALESCING_OPERATOR,
+    EQUAL_SIGN,
+    EQ_EQ,
+    PLUS_THEN_WS,
+    MINUS_THEN_WS,
+    BANG,
+    THROWS_KEYWORD,
+    RETHROWS_KEYWORD,
+    DEFAULT_KEYWORD,
+    WHERE_KEYWORD,
+    ELSE_KEYWORD,
+    CATCH_KEYWORD,
+    AS_KEYWORD,
+    AS_QUEST,
+    AS_BANG,
+    ASYNC_KEYWORD
+};
+
+const uint64_t OP_SYMBOL_SUPPRESSOR[OPERATOR_COUNT] = {
+    0, // ARROW_OPERATOR,
+    0, // DOT_OPERATOR,
+    0, // CONJUNCTION_OPERATOR,
+    0, // DISJUNCTION_OPERATOR,
+    0, // NIL_COALESCING_OPERATOR,
+    0, // EQUAL_SIGN,
+    0, // EQ_EQ,
+    0, // PLUS_THEN_WS,
+    0, // MINUS_THEN_WS,
+    1UL << FAKE_TRY_BANG, // BANG,
+        0, // THROWS_KEYWORD,
+        0, // RETHROWS_KEYWORD,
+        0, // DEFAULT_KEYWORD,
+        0, // WHERE_KEYWORD,
+        0, // ELSE_KEYWORD,
+        0, // CATCH_KEYWORD,
+        0, // AS_KEYWORD,
+        0, // AS_QUEST,
+        0, // AS_BANG,
+        0, // ASYNC_KEYWORD
+};
+
+#define RESERVED_OP_COUNT 31
+
+const char* RESERVED_OPS[RESERVED_OP_COUNT] = {
+    "/",
+    "=",
+    "-",
+    "+",
+    "!",
+    "*",
+    "%",
+    "<",
+    ">",
+    "&",
+    "|",
+    "^",
+    "?",
+    "~",
+    ".",
+    "..",
+    "->",
+    "/*",
+    "*/",
+    "+=",
+    "-=",
+    "*=",
+    "/=",
+    "%=",
+    ">>",
+    "<<",
+    "++",
+    "--",
+    "===",
+    "...",
+    "..<"
+};
+
+static bool is_cross_semi_token(enum TokenType op) {
+    switch(op) {
+    case ARROW_OPERATOR:
+    case DOT_OPERATOR:
+    case CONJUNCTION_OPERATOR:
+    case DISJUNCTION_OPERATOR:
+    case NIL_COALESCING_OPERATOR:
+    case EQUAL_SIGN:
+    case EQ_EQ:
+    case PLUS_THEN_WS:
+    case MINUS_THEN_WS:
+    case THROWS_KEYWORD:
+    case RETHROWS_KEYWORD:
+    case DEFAULT_KEYWORD:
+    case WHERE_KEYWORD:
+    case ELSE_KEYWORD:
+    case CATCH_KEYWORD:
+    case AS_KEYWORD:
+    case AS_QUEST:
+    case AS_BANG:
+    case ASYNC_KEYWORD:
+    case CUSTOM_OPERATOR:
+        return true;
+    case BANG:
+    default:
+        return false;
+    }
+}
+
+#define NON_CONSUMING_CROSS_SEMI_CHAR_COUNT 3
+const uint32_t NON_CONSUMING_CROSS_SEMI_CHARS[NON_CONSUMING_CROSS_SEMI_CHAR_COUNT] = { '?', ':', '{' };
+
+/**
+ * All possible results of having performed some sort of parsing.
+ *
+ * A parser can return a result along two dimensions:
+ * 1. Should the scanner continue trying to find another result?
+ * 2. Was some result produced by this parsing attempt?
+ *
+ * These are flattened into a single enum together. When the function returns one of the `TOKEN_FOUND` cases, it
+ * will always populate its `symbol_result` field. When it returns one of the `STOP_PARSING` cases, callers should
+ * immediately return (with the value, if there is one).
+ */
+enum ParseDirective {
+    CONTINUE_PARSING_NOTHING_FOUND,
+    CONTINUE_PARSING_TOKEN_FOUND,
+    CONTINUE_PARSING_SLASH_CONSUMED,
+    STOP_PARSING_NOTHING_FOUND,
+    STOP_PARSING_TOKEN_FOUND,
+    STOP_PARSING_END_OF_FILE
+};
+
+struct ScannerState {
+    uint32_t ongoing_raw_str_hash_count;
+};
+
+void *tree_sitter_swift_external_scanner_create() {
+    return calloc(1, sizeof(struct ScannerState));
+}
+
+void tree_sitter_swift_external_scanner_destroy(void *payload) {
+    free(payload);
+}
+
+void tree_sitter_swift_external_scanner_reset(void *payload) {
+    struct ScannerState *state = (struct ScannerState *)payload;
+    state->ongoing_raw_str_hash_count = 0;
+}
+
+unsigned tree_sitter_swift_external_scanner_serialize(void *payload, char *buffer) {
+    struct ScannerState *state = (struct ScannerState *)payload;
+    uint32_t hash_count = state->ongoing_raw_str_hash_count;
+    buffer[0] = (hash_count >> 24) & 0xff;
+    buffer[1] = (hash_count >> 16) & 0xff;
+    buffer[2] = (hash_count >> 8) & 0xff;
+    buffer[3] = (hash_count) & 0xff;
+    return 4;
+}
+
+void tree_sitter_swift_external_scanner_deserialize(
+    void *payload,
+    const char *buffer,
+    unsigned length
+) {
+    if (length < 4) {
+        return;
+    }
+
+    uint32_t hash_count = (
+                              (((uint32_t) buffer[0]) << 24) |
+                              (((uint32_t) buffer[1]) << 16) |
+                              (((uint32_t) buffer[2]) << 8) |
+                              (((uint32_t) buffer[3]))
+                          );
+    struct ScannerState *state = (struct ScannerState *)payload;
+    state->ongoing_raw_str_hash_count = hash_count;
+}
+
+static void advance(TSLexer *lexer) {
+    lexer->advance(lexer, false);
+}
+
+static bool should_treat_as_wspace(int32_t character) {
+    return iswspace(character) || (((int32_t) ';') == character);
+}
+
+static int32_t encountered_op_count(bool *encountered_operator) {
+    int32_t encountered = 0;
+    for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+        if (encountered_operator[op_idx]) {
+            encountered++;
+        }
+    }
+
+    return encountered;
+}
+
+static bool any_reserved_ops(uint8_t *encountered_reserved_ops) {
+    for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+        if (encountered_reserved_ops[op_idx] == 2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static bool is_legal_custom_operator(
+    int32_t char_idx,
+    int32_t first_char,
+    int32_t cur_char
+) {
+    bool is_first_char = !char_idx;
+    switch (cur_char) {
+    case '=':
+    case '-':
+    case '+':
+    case '!':
+    case '%':
+    case '<':
+    case '>':
+    case '&':
+    case '|':
+    case '^':
+    case '?':
+    case '~':
+        return true;
+    case '.':
+        // Grammar allows `.` for any operator that starts with `.`
+        return is_first_char || first_char == '.';
+    case '*':
+    case '/':
+        // Not listed in the grammar, but `/*` and `//` can't be the start of an operator since they start comments
+        return char_idx != 1 || first_char != '/';
+    default:
+        if (
+            (cur_char >= 0x00A1 && cur_char <= 0x00A7) ||
+            (cur_char == 0x00A9) ||
+            (cur_char == 0x00AB) ||
+            (cur_char == 0x00AC) ||
+            (cur_char == 0x00AE) ||
+            (cur_char >= 0x00B0 && cur_char <= 0x00B1) ||
+            (cur_char == 0x00B6) ||
+            (cur_char == 0x00BB) ||
+            (cur_char == 0x00BF) ||
+            (cur_char == 0x00D7) ||
+            (cur_char == 0x00F7) ||
+            (cur_char >= 0x2016 && cur_char <= 0x2017) ||
+            (cur_char >= 0x2020 && cur_char <= 0x2027) ||
+            (cur_char >= 0x2030 && cur_char <= 0x203E) ||
+            (cur_char >= 0x2041 && cur_char <= 0x2053) ||
+            (cur_char >= 0x2055 && cur_char <= 0x205E) ||
+            (cur_char >= 0x2190 && cur_char <= 0x23FF) ||
+            (cur_char >= 0x2500 && cur_char <= 0x2775) ||
+            (cur_char >= 0x2794 && cur_char <= 0x2BFF) ||
+            (cur_char >= 0x2E00 && cur_char <= 0x2E7F) ||
+            (cur_char >= 0x3001 && cur_char <= 0x3003) ||
+            (cur_char >= 0x3008 && cur_char <= 0x3020) ||
+            (cur_char == 0x3030)
+        ) {
+            return true;
+        } else if (
+            (cur_char >= 0x0300 && cur_char <= 0x036f) ||
+            (cur_char >= 0x1DC0 && cur_char <= 0x1DFF) ||
+            (cur_char >= 0x20D0 && cur_char <= 0x20FF) ||
+            (cur_char >= 0xFE00 && cur_char <= 0xFE0F) ||
+            (cur_char >= 0xFE20 && cur_char <= 0xFE2F) ||
+            (cur_char >= 0xE0100 && cur_char <= 0xE01EF)
+        ) {
+            return !is_first_char;
+        } else {
+            return false;
+        }
+    }
+}
+
+static bool eat_operators(
+    TSLexer *lexer,
+    const bool *valid_symbols,
+    bool mark_end,
+    const int32_t prior_char,
+    enum TokenType *symbol_result
+) {
+    bool possible_operators[OPERATOR_COUNT];
+    uint8_t reserved_operators[RESERVED_OP_COUNT];
+    for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+        possible_operators[op_idx] = valid_symbols[OP_SYMBOLS[op_idx]] && (!prior_char || OPERATORS[op_idx][0] == prior_char);
+    }
+    for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+        reserved_operators[op_idx] = !prior_char || RESERVED_OPS[op_idx][0] == prior_char;
+    }
+
+    bool possible_custom_operator = valid_symbols[CUSTOM_OPERATOR];
+    int32_t first_char = prior_char ? prior_char : lexer->lookahead;
+    int32_t last_examined_char = first_char;
+
+    int32_t str_idx = prior_char ? 1 : 0;
+    int32_t full_match = -1;
+    while(true) {
+        for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
+            if (!possible_operators[op_idx]) {
+                continue;
+            }
+
+            if (OPERATORS[op_idx][str_idx] == '\0') {
+                // Make sure that the operator is allowed to have the next character as its lookahead.
+                enum IllegalTerminatorGroup illegal_terminators = OP_ILLEGAL_TERMINATORS[op_idx];
+                switch (lexer->lookahead) {
+                // See "Operators":
+                // https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID418
+                case '/':
+                case '=':
+                case '-':
+                case '+':
+                case '!':
+                case '*':
+                case '%':
+                case '<':
+                case '>':
+                case '&':
+                case '|':
+                case '^':
+                case '?':
+                case '~':
+                    if (illegal_terminators == OPERATOR_SYMBOLS) {
+                        break;
+                    } // Otherwise, intentionally fall through to the OPERATOR_OR_DOT case
+                // fall through
+                case '.':
+                    if (illegal_terminators == OPERATOR_OR_DOT) {
+                        break;
+                    } // Otherwise, fall through to DEFAULT which checks its groups directly
+                // fall through
+                default:
+                    if (iswalnum(lexer->lookahead) && illegal_terminators == ALPHANUMERIC) {
+                        break;
+                    }
+
+                    if (!iswspace(lexer->lookahead) && illegal_terminators == NON_WHITESPACE) {
+                        break;
+                    }
+
+                    full_match = op_idx;
+                    if (mark_end) {
+                        lexer->mark_end(lexer);
+                    }
+                }
+
+                possible_operators[op_idx] = false;
+                continue;
+            }
+
+            if (OPERATORS[op_idx][str_idx] != lexer->lookahead) {
+                possible_operators[op_idx] = false;
+                continue;
+            }
+        }
+
+        for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
+            if (!reserved_operators[op_idx]) {
+                continue;
+            }
+
+            if (RESERVED_OPS[op_idx][str_idx] == '\0') {
+                reserved_operators[op_idx] = 0;
+                continue;
+            }
+
+            if (RESERVED_OPS[op_idx][str_idx] != lexer->lookahead) {
+                reserved_operators[op_idx] = 0;
+                continue;
+            }
+
+            if (RESERVED_OPS[op_idx][str_idx + 1] == '\0') {
+                reserved_operators[op_idx] = 2;
+                continue;
+            }
+        }
+
+        possible_custom_operator = possible_custom_operator && is_legal_custom_operator(
+                                       str_idx,
+                                       first_char,
+                                       lexer->lookahead
+                                   );
+
+        uint32_t encountered_ops = encountered_op_count(possible_operators);
+        if (encountered_ops == 0) {
+            if (!possible_custom_operator) {
+                break;
+            } else if (mark_end && full_match == -1) {
+                lexer->mark_end(lexer);
+            }
+        }
+
+        last_examined_char = lexer->lookahead;
+        lexer->advance(lexer, false);
+        str_idx += 1;
+
+        if (encountered_ops == 0 && !is_legal_custom_operator(
+                    str_idx,
+                    first_char,
+                    lexer->lookahead
+                )) {
+            break;
+        }
+    }
+
+    if (full_match != -1) {
+        // We have a match -- first see if that match has a symbol that suppresses it. For example, in `try!`, we do not
+        // want to emit the `!` as a symbol in our scanner, because we want the parser to have the chance to parse it as
+        // an immediate token.
+        uint64_t suppressing_symbols = OP_SYMBOL_SUPPRESSOR[full_match];
+        if (suppressing_symbols) {
+            for (uint64_t suppressor = 0; suppressor < TOKEN_COUNT; suppressor++) {
+                if (!(suppressing_symbols & 1ULL << suppressor)) {
+                    continue;
+                }
+
+                // The suppressing symbol is valid in this position, so skip it.
+                if (valid_symbols[suppressor]) {
+                    return false;
+                }
+            }
+        }
+        *symbol_result = OP_SYMBOLS[full_match];
+        return true;
+    }
+
+    if (possible_custom_operator && !any_reserved_ops(reserved_operators)) {
+        if ((last_examined_char != '<' || iswspace(lexer->lookahead)) && mark_end) {
+            lexer->mark_end(lexer);
+        }
+        *symbol_result = CUSTOM_OPERATOR;
+        return true;
+    }
+
+    return false;
+}
+
+static enum ParseDirective eat_comment(
+    TSLexer *lexer,
+    const bool *valid_symbols,
+    bool mark_end,
+    enum TokenType *symbol_result
+) {
+    if (lexer->lookahead != '/') {
+        return CONTINUE_PARSING_NOTHING_FOUND;
+    }
+
+    advance(lexer);
+
+    if (lexer->lookahead != '*') {
+        return CONTINUE_PARSING_SLASH_CONSUMED;
+    }
+
+    advance(lexer);
+
+    bool after_star = false;
+    unsigned nesting_depth = 1;
+    for (;;) {
+        switch (lexer->lookahead) {
+        case '\0':
+            return STOP_PARSING_END_OF_FILE;
+        case '*':
+            advance(lexer);
+            after_star = true;
+            break;
+        case '/':
+            if (after_star) {
+                advance(lexer);
+                after_star = false;
+                nesting_depth--;
+                if (nesting_depth == 0) {
+                    if (mark_end) {
+                        lexer->mark_end(lexer);
+                    }
+                    *symbol_result = BLOCK_COMMENT;
+                    return STOP_PARSING_TOKEN_FOUND;
+                }
+            } else {
+                advance(lexer);
+                after_star = false;
+                if (lexer->lookahead == '*') {
+                    nesting_depth++;
+                    advance(lexer);
+                }
+            }
+            break;
+        default:
+            advance(lexer);
+            after_star = false;
+            break;
+        }
+    }
+}
+
+static enum ParseDirective eat_whitespace(
+    TSLexer *lexer,
+    const bool *valid_symbols,
+    enum TokenType *symbol_result
+) {
+    enum ParseDirective ws_directive = CONTINUE_PARSING_NOTHING_FOUND;
+    bool semi_is_valid = valid_symbols[IMPLICIT_SEMI] && valid_symbols[EXPLICIT_SEMI];
+    uint32_t lookahead;
+    while (should_treat_as_wspace(lookahead = lexer->lookahead)) {
+        if (lookahead == ';') {
+            if (semi_is_valid) {
+                ws_directive = STOP_PARSING_TOKEN_FOUND;
+                lexer->advance(lexer, false);
+            }
+
+            break;
+        }
+
+        lexer->advance(lexer, true);
+
+        lexer->mark_end(lexer);
+
+        if (ws_directive == CONTINUE_PARSING_NOTHING_FOUND && (lookahead == '\n' || lookahead == '\r')) {
+            ws_directive = CONTINUE_PARSING_TOKEN_FOUND;
+        }
+    }
+
+    enum ParseDirective any_comment = CONTINUE_PARSING_NOTHING_FOUND;
+    if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND && lookahead == '/') {
+        bool has_seen_single_comment = false;
+        while (lexer->lookahead == '/') {
+            // It's possible that this is a comment - start an exploratory mission to find out, and if it is, look for what
+            // comes after it. We care about what comes after it for the purpose of suppressing the newline.
+
+            enum TokenType multiline_comment_result;
+            any_comment = eat_comment(lexer, valid_symbols, /* mark_end */ false, &multiline_comment_result);
+            if (any_comment == STOP_PARSING_TOKEN_FOUND) {
+                // This is a multiline comment. This scanner should be parsing those, so we might want to bail out and
+                // emit it instead. However, we only want to do that if we haven't advanced through a _single_ line
+                // comment on the way - otherwise that will get lumped into this.
+                if (!has_seen_single_comment) {
+                    lexer->mark_end(lexer);
+                    *symbol_result = multiline_comment_result;
+                    return STOP_PARSING_TOKEN_FOUND;
+                }
+            } else if (any_comment == STOP_PARSING_END_OF_FILE) {
+                return STOP_PARSING_END_OF_FILE;
+            } else if (any_comment == CONTINUE_PARSING_SLASH_CONSUMED) {
+                // We accidentally ate a slash -- we should actually bail out, say we saw nothing, and let the next pass
+                // take it from after the newline.
+                return CONTINUE_PARSING_SLASH_CONSUMED;
+            } else if (lexer->lookahead == '/') {
+                // There wasn't a multiline comment, which we know means that the comment parser ate its `/` and then
+                // bailed out. If it had seen anything comment-like after that first `/` it would have continued going
+                // and eventually had a well-formed comment or an EOF. Thus, if we're currently looking at a `/`, it's
+                // the second one of those and it means we have a single-line comment.
+                has_seen_single_comment = true;
+                while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
+                    lexer->advance(lexer, true);
+                }
+            } else if (iswspace(lexer->lookahead)) {
+                // We didn't see any type of comment - in fact, we saw an operator that we don't normally treat as an
+                // operator. Still, this is a reason to stop parsing.
+                return STOP_PARSING_NOTHING_FOUND;
+            }
+
+            // If we skipped through some comment, we're at whitespace now, so advance.
+            while(iswspace(lexer->lookahead)) {
+                any_comment = CONTINUE_PARSING_NOTHING_FOUND; // We're advancing, so clear out the comment
+                lexer->advance(lexer, true);
+            }
+        }
+
+        enum TokenType operator_result;
+        bool saw_operator = eat_operators(
+                                lexer,
+                                valid_symbols,
+                                /* mark_end */ false,
+                                '\0',
+                                &operator_result
+                            );
+        if (saw_operator) {
+            // The operator we saw should suppress the newline, so bail out.
+            return STOP_PARSING_NOTHING_FOUND;
+        } else {
+            // Promote the implicit newline to an explicit one so we don't check for operators again.
+            *symbol_result = IMPLICIT_SEMI;
+            ws_directive = STOP_PARSING_TOKEN_FOUND;
+        }
+    }
+
+    // Let's consume operators that can live after a "semicolon" style newline. Before we do that, though, we want to
+    // check for a set of characters that we do not consume, but that still suppress the semi.
+    if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND) {
+        for (int i = 0; i < NON_CONSUMING_CROSS_SEMI_CHAR_COUNT; i++) {
+            if (NON_CONSUMING_CROSS_SEMI_CHARS[i] == lookahead) {
+                return CONTINUE_PARSING_NOTHING_FOUND;
+            }
+        }
+    }
+
+    if (semi_is_valid && ws_directive != CONTINUE_PARSING_NOTHING_FOUND) {
+        *symbol_result = lookahead == ';' ? EXPLICIT_SEMI : IMPLICIT_SEMI;
+        return ws_directive;
+    }
+
+    return CONTINUE_PARSING_NOTHING_FOUND;
+}
+
+#define DIRECTIVE_COUNT 4
+const char* DIRECTIVES[OPERATOR_COUNT] = {
+    "if",
+    "elseif",
+    "else",
+    "endif"
+};
+
+const enum TokenType DIRECTIVE_SYMBOLS[DIRECTIVE_COUNT] = {
+    DIRECTIVE_IF,
+    DIRECTIVE_ELSEIF,
+    DIRECTIVE_ELSE,
+    DIRECTIVE_ENDIF
+};
+
+static enum TokenType find_possible_compiler_directive(TSLexer *lexer) {
+    bool possible_directives[DIRECTIVE_COUNT];
+    for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
+        possible_directives[dir_idx] = true;
+    }
+
+    int32_t str_idx = 0;
+    int32_t full_match = -1;
+    while(true) {
+        for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
+            if (!possible_directives[dir_idx]) {
+                continue;
+            }
+
+            uint8_t expected_char = DIRECTIVES[dir_idx][str_idx];
+            if (expected_char == '\0') {
+                full_match = dir_idx;
+                lexer->mark_end(lexer);
+            }
+
+            if (expected_char != lexer->lookahead) {
+                possible_directives[dir_idx] = false;
+                continue;
+            }
+        }
+
+        uint8_t match_count = 0;
+        for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx += 1) {
+            if (possible_directives[dir_idx]) {
+                match_count += 1;
+            }
+        }
+
+        if (match_count == 0) {
+            break;
+        }
+
+        lexer->advance(lexer, false);
+        str_idx += 1;
+    }
+
+    if (full_match == -1) {
+        // No compiler directive found, so just match the starting symbol
+        return HASH_SYMBOL;
+    }
+
+    return DIRECTIVE_SYMBOLS[full_match];
+}
+
+static bool eat_raw_str_part(
+    struct ScannerState *state,
+    TSLexer *lexer,
+    const bool *valid_symbols,
+    enum TokenType *symbol_result
+) {
+    uint32_t hash_count = state->ongoing_raw_str_hash_count;
+    if (!valid_symbols[RAW_STR_PART]) {
+        return false;
+    } else if (hash_count == 0) {
+        // If this is a raw_str_part, it's the first one - look for hashes
+        while (lexer->lookahead == '#') {
+            hash_count += 1;
+            advance(lexer);
+        }
+
+        if (hash_count == 0) {
+            return false;
+        }
+
+        if (lexer->lookahead == '"') {
+            advance(lexer);
+        } else if (hash_count == 1) {
+            lexer->mark_end(lexer);
+            *symbol_result = find_possible_compiler_directive(lexer);
+            return true;
+        } else {
+            return false;
+        }
+
+    } else if (valid_symbols[RAW_STR_CONTINUING_INDICATOR]) {
+        // This is the end of an interpolation - now it's another raw_str_part. This is a synthetic
+        // marker to tell us that the grammar just consumed a `(` symbol to close a raw
+        // interpolation (since we don't want to fire on every `(` in existence). We don't have
+        // anything to do except continue.
+    } else {
+        return false;
+    }
+
+    // We're in a state where anything other than `hash_count` hash symbols in a row should be eaten
+    // and is part of a string.
+    // The last character _before_ the hashes will tell us what happens next.
+    // Matters are also complicated by the fact that we don't want to consume every character we
+    // visit; if we see a `\#(`, for instance, with the appropriate number of hash symbols, we want
+    // to end our parsing _before_ that sequence. This allows highlighting tools to treat that as a
+    // separate token.
+    while (lexer->lookahead != '\0') {
+        uint8_t last_char = '\0';
+        lexer->mark_end(lexer); // We always want to parse thru the start of the string so far
+        // Advance through anything that isn't a hash symbol, because we want to count those.
+        while (lexer->lookahead != '#' && lexer->lookahead != '\0') {
+            last_char = lexer->lookahead;
+            advance(lexer);
+            if (last_char != '\\' || lexer->lookahead == '\\') {
+                // Mark a new end, but only if we didn't just advance past a `\` symbol, since we
+                // don't want to consume that. Exception: if this is a `\` that happens _right
+                // after_ another `\`, we for some reason _do_ want to consume that, because
+                // apparently that is parsed as a literal `\` followed by something escaped.
+                lexer->mark_end(lexer);
+            }
+        }
+
+        // We hit at least one hash - count them and see if they match.
+        uint32_t current_hash_count = 0;
+        while (lexer->lookahead == '#' && current_hash_count < hash_count) {
+            current_hash_count += 1;
+            advance(lexer);
+        }
+
+        // If we saw exactly the right number of hashes, one of three things is true:
+        // 1. We're trying to interpolate into this string.
+        // 2. The string just ended.
+        // 3. This was just some hash characters doing nothing important.
+        if (current_hash_count == hash_count) {
+            if (last_char == '\\' && lexer->lookahead == '(') {
+                // Interpolation case! Don't consume those chars; they get saved for grammar.js.
+                *symbol_result = RAW_STR_PART;
+                state->ongoing_raw_str_hash_count = hash_count;
+                return true;
+            } else if (last_char == '"') {
+                // The string is finished! Mark the end here, on the very last hash symbol.
+                lexer->mark_end(lexer);
+                *symbol_result = RAW_STR_END_PART;
+                state->ongoing_raw_str_hash_count = 0;
+                return true;
+            }
+            // Nothing special happened - let the string continue.
+        }
+    }
+
+    return false;
+}
+
+bool tree_sitter_swift_external_scanner_scan(
+    void *payload,
+    TSLexer *lexer,
+    const bool *valid_symbols
+) {
+    // Figure out our scanner state
+    struct ScannerState *state = (struct ScannerState *)payload;
+
+    // Consume any whitespace at the start.
+    enum TokenType ws_result;
+    enum ParseDirective ws_directive = eat_whitespace(lexer, valid_symbols, &ws_result);
+    if (ws_directive == STOP_PARSING_TOKEN_FOUND) {
+        lexer->result_symbol = ws_result;
+        return true;
+    }
+
+    if (ws_directive == STOP_PARSING_NOTHING_FOUND || ws_directive == STOP_PARSING_END_OF_FILE) {
+        return false;
+    }
+
+    bool has_ws_result = (ws_directive == CONTINUE_PARSING_TOKEN_FOUND);
+
+    // Now consume comments (before custom operators so that those aren't treated as comments)
+    enum TokenType comment_result;
+    enum ParseDirective comment = ws_directive == CONTINUE_PARSING_SLASH_CONSUMED ? ws_directive : eat_comment(lexer, valid_symbols, /* mark_end */ true, &comment_result);
+    if (comment == STOP_PARSING_TOKEN_FOUND) {
+        lexer->mark_end(lexer);
+        lexer->result_symbol = comment_result;
+        return true;
+    }
+
+    if (comment == STOP_PARSING_END_OF_FILE) {
+        return false;
+    }
+    // Now consume any operators that might cause our whitespace to be suppressed.
+    enum TokenType operator_result;
+    bool saw_operator = eat_operators(
+                            lexer,
+                            valid_symbols,
+                            /* mark_end */ !has_ws_result,
+                            comment == CONTINUE_PARSING_SLASH_CONSUMED ? '/' : '\0',
+                            &operator_result
+                        );
+
+    if (saw_operator && (!has_ws_result || is_cross_semi_token(operator_result))) {
+        lexer->result_symbol = operator_result;
+        if (has_ws_result) lexer->mark_end(lexer);
+        return true;
+    }
+
+    if (has_ws_result) {
+        // Don't `mark_end`, since we may have advanced through some operators.
+        lexer->result_symbol = ws_result;
+        return true;
+    }
+
+    // NOTE: this will consume any `#` characters it sees, even if it does not find a result. Keep
+    // it at the end so that it doesn't interfere with special literals or selectors!
+    enum TokenType raw_str_result;
+    bool saw_raw_str_part = eat_raw_str_part(state, lexer, valid_symbols, &raw_str_result);
+    if (saw_raw_str_part) {
+        lexer->result_symbol = raw_str_result;
+        return true;
+    }
+
+    return false;
+}
+
--- a/unified/extractor/tree-sitter-swift/src/tree_sitter/alloc.h
+++ b/unified/extractor/tree-sitter-swift/src/tree_sitter/alloc.h
@@ -0,0 +1,54 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Allow clients to override allocation functions
+#ifdef TREE_SITTER_REUSE_ALLOCATOR
+
+extern void *(*ts_current_malloc)(size_t);
+extern void *(*ts_current_calloc)(size_t, size_t);
+extern void *(*ts_current_realloc)(void *, size_t);
+extern void (*ts_current_free)(void *);
+
+#ifndef ts_malloc
+#define ts_malloc  ts_current_malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  ts_current_calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc ts_current_realloc
+#endif
+#ifndef ts_free
+#define ts_free    ts_current_free
+#endif
+
+#else
+
+#ifndef ts_malloc
+#define ts_malloc  malloc
+#endif
+#ifndef ts_calloc
+#define ts_calloc  calloc
+#endif
+#ifndef ts_realloc
+#define ts_realloc realloc
+#endif
+#ifndef ts_free
+#define ts_free    free
+#endif
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
--- a/unified/extractor/tree-sitter-swift/src/tree_sitter/array.h
+++ b/unified/extractor/tree-sitter-swift/src/tree_sitter/array.h
@@ -0,0 +1,290 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./alloc.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+
+#define Array(T)       \
+  struct {             \
+    T *contents;       \
+    uint32_t size;     \
+    uint32_t capacity; \
+  }
+
+/// Initialize an array.
+#define array_init(self) \
+  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+/// Create an empty array.
+#define array_new() \
+  { NULL, 0, 0 }
+
+/// Get a pointer to the element at a given `index` in the array.
+#define array_get(self, _index) \
+  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
+
+/// Get a pointer to the first element in the array.
+#define array_front(self) array_get(self, 0)
+
+/// Get a pointer to the last element in the array.
+#define array_back(self) array_get(self, (self)->size - 1)
+
+/// Clear the array, setting its size to zero. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_clear(self) ((self)->size = 0)
+
+/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
+/// less than the array's current capacity, this function has no effect.
+#define array_reserve(self, new_capacity) \
+  _array__reserve((Array *)(self), array_elem_size(self), new_capacity)
+
+/// Free any memory allocated for this array. Note that this does not free any
+/// memory allocated for the array's contents.
+#define array_delete(self) _array__delete((Array *)(self))
+
+/// Push a new `element` onto the end of the array.
+#define array_push(self, element)                            \
+  (_array__grow((Array *)(self), 1, array_elem_size(self)), \
+   (self)->contents[(self)->size++] = (element))
+
+/// Increase the array's size by `count` elements.
+/// New elements are zero-initialized.
+#define array_grow_by(self, count) \
+  do { \
+    if ((count) == 0) break; \
+    _array__grow((Array *)(self), count, array_elem_size(self)); \
+    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
+    (self)->size += (count); \
+  } while (0)
+
+/// Append all elements from one array to the end of another.
+#define array_push_all(self, other)                                       \
+  array_extend((self), (other)->size, (other)->contents)
+
+/// Append `count` elements to the end of the array, reading their values from the
+/// `contents` pointer.
+#define array_extend(self, count, contents)                    \
+  _array__splice(                                               \
+    (Array *)(self), array_elem_size(self), (self)->size, \
+    0, count,  contents                                        \
+  )
+
+/// Remove `old_count` elements from the array starting at the given `index`. At
+/// the same index, insert `new_count` new elements, reading their values from the
+/// `new_contents` pointer.
+#define array_splice(self, _index, old_count, new_count, new_contents)  \
+  _array__splice(                                                       \
+    (Array *)(self), array_elem_size(self), _index,                \
+    old_count, new_count, new_contents                                 \
+  )
+
+/// Insert one `element` into the array at the given `index`.
+#define array_insert(self, _index, element) \
+  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
+
+/// Remove one element from the array at the given `index`.
+#define array_erase(self, _index) \
+  _array__erase((Array *)(self), array_elem_size(self), _index)
+
+/// Pop the last element off the array, returning the element by value.
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+/// Assign the contents of one array to another, reallocating if necessary.
+#define array_assign(self, other) \
+  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
+
+/// Swap one array with another
+#define array_swap(self, other) \
+  _array__swap((Array *)(self), (Array *)(other))
+
+/// Get the size of the array contents
+#define array_elem_size(self) (sizeof *(self)->contents)
+
+/// Search a sorted array for a given `needle` value, using the given `compare`
+/// callback to determine the order.
+///
+/// If an existing element is found to be equal to `needle`, then the `index`
+/// out-parameter is set to the existing value's index, and the `exists`
+/// out-parameter is set to true. Otherwise, `index` is set to an index where
+/// `needle` should be inserted in order to preserve the sorting, and `exists`
+/// is set to false.
+#define array_search_sorted_with(self, compare, needle, _index, _exists) \
+  _array__search_sorted(self, 0, compare, , needle, _index, _exists)
+
+/// Search a sorted array for a given `needle` value, using integer comparisons
+/// of a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_with`.
+#define array_search_sorted_by(self, field, needle, _index, _exists) \
+  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
+
+/// Insert a given `value` into a sorted array, using the given `compare`
+/// callback to determine the order.
+#define array_insert_sorted_with(self, compare, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+/// Insert a given `value` into a sorted array, using integer comparisons of
+/// a given struct field (specified with a leading dot) to determine the order.
+///
+/// See also `array_search_sorted_by`.
+#define array_insert_sorted_by(self, field, value) \
+  do { \
+    unsigned _index, _exists; \
+    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
+    if (!_exists) array_insert(self, _index, value); \
+  } while (0)
+
+// Private
+
+typedef Array(void) Array;
+
+/// This is not what you're looking for, see `array_delete`.
+static inline void _array__delete(Array *self) {
+  if (self->contents) {
+    ts_free(self->contents);
+    self->contents = NULL;
+    self->size = 0;
+    self->capacity = 0;
+  }
+}
+
+/// This is not what you're looking for, see `array_erase`.
+static inline void _array__erase(Array *self, size_t element_size,
+                                uint32_t index) {
+  assert(index < self->size);
+  char *contents = (char *)self->contents;
+  memmove(contents + index * element_size, contents + (index + 1) * element_size,
+          (self->size - index - 1) * element_size);
+  self->size--;
+}
+
+/// This is not what you're looking for, see `array_reserve`.
+static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
+  if (new_capacity > self->capacity) {
+    if (self->contents) {
+      self->contents = ts_realloc(self->contents, new_capacity * element_size);
+    } else {
+      self->contents = ts_malloc(new_capacity * element_size);
+    }
+    self->capacity = new_capacity;
+  }
+}
+
+/// This is not what you're looking for, see `array_assign`.
+static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
+  _array__reserve(self, element_size, other->size);
+  self->size = other->size;
+  memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+/// This is not what you're looking for, see `array_swap`.
+static inline void _array__swap(Array *self, Array *other) {
+  Array swap = *other;
+  *other = *self;
+  *self = swap;
+}
+
+/// This is not what you're looking for, see `array_push` or `array_grow_by`.
+static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
+  if (new_size > self->capacity) {
+    uint32_t new_capacity = self->capacity * 2;
+    if (new_capacity < 8) new_capacity = 8;
+    if (new_capacity < new_size) new_capacity = new_size;
+    _array__reserve(self, element_size, new_capacity);
+  }
+}
+
+/// This is not what you're looking for, see `array_splice`.
+static inline void _array__splice(Array *self, size_t element_size,
+                                 uint32_t index, uint32_t old_count,
+                                 uint32_t new_count, const void *elements) {
+  uint32_t new_size = self->size + new_count - old_count;
+  uint32_t old_end = index + old_count;
+  uint32_t new_end = index + new_count;
+  assert(old_end <= self->size);
+
+  _array__reserve(self, element_size, new_size);
+
+  char *contents = (char *)self->contents;
+  if (self->size > old_end) {
+    memmove(
+      contents + new_end * element_size,
+      contents + old_end * element_size,
+      (self->size - old_end) * element_size
+    );
+  }
+  if (new_count > 0) {
+    if (elements) {
+      memcpy(
+        (contents + index * element_size),
+        elements,
+        new_count * element_size
+      );
+    } else {
+      memset(
+        (contents + index * element_size),
+        0,
+        new_count * element_size
+      );
+    }
+  }
+  self->size += new_count - old_count;
+}
+
+/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
+/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
+#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
+  do { \
+    *(_index) = start; \
+    *(_exists) = false; \
+    uint32_t size = (self)->size - *(_index); \
+    if (size == 0) break; \
+    int comparison; \
+    while (size > 1) { \
+      uint32_t half_size = size / 2; \
+      uint32_t mid_index = *(_index) + half_size; \
+      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
+      if (comparison <= 0) *(_index) = mid_index; \
+      size -= half_size; \
+    } \
+    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
+    if (comparison == 0) *(_exists) = true; \
+    else if (comparison < 0) *(_index) += 1; \
+  } while (0)
+
+/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
+/// parameter by reference in order to work with the generic sorting function above.
+#define _compare_int(a, b) ((int)*(a) - (int)(b))
+
+#ifdef _MSC_VER
+#pragma warning(default : 4101)
+#elif defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_ARRAY_H_
--- a/unified/extractor/tree-sitter-swift/src/tree_sitter/parser.h
+++ b/unified/extractor/tree-sitter-swift/src/tree_sitter/parser.h
@@ -0,0 +1,266 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSStateId;
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+  void (*log)(const TSLexer *, const char *, ...);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+typedef struct {
+  int32_t start;
+  int32_t end;
+} TSCharacterRange;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
+  uint32_t index = 0;
+  uint32_t size = len - index;
+  while (size > 1) {
+    uint32_t half_size = size / 2;
+    uint32_t mid_index = index + half_size;
+    TSCharacterRange *range = &ranges[mid_index];
+    if (lookahead >= range->start && lookahead <= range->end) {
+      return true;
+    } else if (lookahead > range->end) {
+      index = mid_index;
+    }
+    size -= half_size;
+  }
+  TSCharacterRange *range = &ranges[index];
+  return (lookahead >= range->start && lookahead <= range->end);
+}
+
+/*
+ *  Lexer Macros
+ */
+
+#ifdef _MSC_VER
+#define UNUSED __pragma(warning(suppress : 4101))
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  UNUSED                        \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define ADVANCE_MAP(...)                                              \
+  {                                                                   \
+    static const uint16_t map[] = { __VA_ARGS__ };                    \
+    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \
+      if (map[i] == lookahead) {                                      \
+        state = map[i + 1];                                           \
+        goto next_state;                                              \
+      }                                                               \
+    }                                                                 \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value)          \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = (state_value),         \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_name, children, precedence, prod_id) \
+  {{                                                       \
+    .reduce = {                                            \
+      .type = TSParseActionTypeReduce,                     \
+      .symbol = symbol_name,                               \
+      .child_count = children,                             \
+      .dynamic_precedence = precedence,                    \
+      .production_id = prod_id                             \
+    },                                                     \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_
--- a/unified/extractor/tree-sitter-swift/tree-sitter.json
+++ b/unified/extractor/tree-sitter-swift/tree-sitter.json
@@ -0,0 +1,39 @@
+{
+  "grammars": [
+    {
+      "name": "swift",
+      "camelcase": "Swift",
+      "scope": "source.swift",
+      "path": ".",
+      "file-types": [
+        "swift"
+      ],
+      "highlights": "queries/highlights.scm",
+      "injections": "queries/injections.scm",
+      "locals": "queries/locals.scm",
+      "injection-regex": "swift"
+    }
+  ],
+  "metadata": {
+    "version": "0.7.2",
+    "license": "MIT",
+    "description": "A tree-sitter grammar for the Swift programming language.",
+    "authors": [
+      {
+        "name": "Alex Pinkus",
+        "email": "alex.pinkus@gmail.com"
+      }
+    ],
+    "links": {
+      "repository": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
+    }
+  },
+  "bindings": {
+    "c": true,
+    "go": true,
+    "node": true,
+    "python": true,
+    "rust": true,
+    "swift": true
+  }
+}