unified: vendor in tree-sitter-swift

This commit is contained in:
Taus
2026-05-08 13:41:14 +00:00
parent 36554d160c
commit 9f6bd88171
27 changed files with 599675 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
load("@rules_rust//cargo:defs.bzl", "cargo_build_script")
load("@rules_rust//rust:defs.bzl", "rust_library")
load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps")
package(default_visibility = ["//visibility:public"])
# This will run the build script from the root of the workspace, and
# collect the outputs.
cargo_build_script(
name = "tree-sitter-swift-build",
srcs = ["bindings/rust/build.rs"],
data = glob([
"src/**",
]),
deps = all_crate_deps(
build = True,
),
)
rust_library(
name = "tree-sitter-swift",
srcs = [
"bindings/rust/lib.rs",
],
aliases = aliases(),
compile_data = glob([
"src/**",
"queries/**",
]) + [
"grammar.js",
],
proc_macro_deps = all_crate_deps(
proc_macro = True,
),
deps = [":tree-sitter-swift-build"] + all_crate_deps(
normal = True,
),
)
exports_files(["Cargo.toml"])

View File

@@ -0,0 +1,21 @@
[package]
name = "tree-sitter-swift"
description = "Swift grammar for the tree-sitter parsing library (vendored copy for the unified extractor)"
version = "0.7.2"
keywords = ["incremental", "parsing", "swift"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/alex-pinkus/tree-sitter-swift"
edition = "2024"
license = "MIT"
build = "bindings/rust/build.rs"
[lib]
path = "bindings/rust/lib.rs"
# When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
[dependencies]
tree-sitter-language = "0.1"
[build-dependencies]
cc = "1.2"

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2021 alex-pinkus
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,127 @@
![Parse rate badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/parse_rate)
[![Crates.io badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/crates_io_version)](https://crates.io/crates/tree-sitter-swift)
[![NPM badge](https://byob.yarr.is/alex-pinkus/tree-sitter-swift/npm_version)](https://www.npmjs.com/package/tree-sitter-swift)
[![Build](https://github.com/alex-pinkus/tree-sitter-swift/actions/workflows/top-repos.yml/badge.svg)](https://github.com/alex-pinkus/tree-sitter-swift/actions/workflows/top-repos.yml)
# tree-sitter-swift
This contains a [`tree-sitter`](https://tree-sitter.github.io/tree-sitter) grammar for the Swift programming language.
## Getting started
To use this parser to parse Swift code, you'll want to depend on either the Rust crate or the NPM package.
### Rust
To use the Rust crate, you'll add this to your `Cargo.toml`:
```
tree-sitter = "0.23.0"
tree-sitter-swift = "=0.7.0"
```
Then you can use a `tree-sitter` parser with the language declared here:
```
let mut parser = tree_sitter::Parser::new();
parser.set_language(tree_sitter_swift::language())?;
// ...
let tree = parser.parse(&my_source_code, None)
.ok_or_else(|| /* error handling code */)?;
```
### Javascript
To use this from NPM, you'll add similar dependencies to `package.json`:
```
"dependencies: {
"tree-sitter-swift": "0.7.0",
"tree-sitter": "^0.22.1"
}
```
Your usage of the parser will look like:
```
const Parser = require("tree-sitter");
const Swift = require("tree-sitter-swift");
const parser = new Parser();
parser.setLanguage(Swift);
// ...
const tree = parser.parse(mySourceCode);
```
### Editing the grammar
With this package checked out, a common workflow for editing the grammar will look something like:
1. Make a change to `grammar.ts`.
2. Run `npm install && npm test` to see whether the change has had impact on existing parsing behavior. The default
`npm test` target requires `valgrind` to be installed; if you do not have it installed, and do not wish to, you can
substitute `tree-sitter test` directly.
3. Run `tree-sitter parse` on some real Swift codebase and see whether (or where) it fails.
4. Use any failures to create new corpus test cases.
## Contributions
All contributions to this repository are welcome.
If said contribution is to check generated files (e.g., `parser.c`) into the repository, be aware that your contribution will not be accepted. Make sure to read the [FAQ entry](https://github.com/alex-pinkus/tree-sitter-swift?tab=readme-ov-file#where-is-your-parserc) and the [prior](https://github.com/alex-pinkus/tree-sitter-swift/issues/362) [discussions](https://github.com/alex-pinkus/tree-sitter-swift/pull/315) and [compromises](https://github.com/alex-pinkus/tree-sitter-swift/issues/149) that have occurred already on this topic.
## Using tree-sitter-swift in Web Assembly
To use tree-sitter-swift as a language for the web bindings version tree-sitter, which will likely be a more modern version than the published node
module. [see](https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md). Follow the instructions below
1. Install the node modules `npm install web-tree-sitter tree-sitter-swift`
2. Run the tree-sitter cli to create the wasm bundle
```sh
$ npx tree-sitter build-asm ./node_modules/tree-sitter
```
3. Boot tree-sitter wasm like this.
```js
const Parser = require("web-tree-sitter");
async function run() {
//needs to happen first
await Parser.init();
//wait for the load of swift
const Swift = await Parser.Language.load("./tree-sitter-swift.wasm");
const parser = new Parser();
parser.setLanguage(Swift);
//Parse your swift code here.
const tree = parser.parse('print("Hello, World!")');
}
//if you want to run this
run().then(console.log, console.error);
```
## Frequently asked questions
### Where is your `parser.c`?
This repository currently omits most of the code that is autogenerated during a build. This means, for instance, that
`grammar.json` and `parser.c` are both only available following a build. It also significantly reduces noise during
diffs.
The side benefit of not checking in `parser.c` is that you can guarantee backwards compatibility. Parsers generated by
the tree-sitter CLI aren't always backwards compatible. If you need a parser, generate it yourself using the CLI; all
the information to do so is available in this package. By doing that, you'll also know for sure that your parser version
and your library version are compatible.
If you need a `parser.c`, and you don't care about the tree-sitter version, but you don't have a local setup that would
allow you to obtain the parser, you can just download one from a recent workflow run in this package. To do so:
- Go to the [GitHub actions page](https://github.com/alex-pinkus/tree-sitter-swift/actions) for this
repository.
- Click on the "Publish `grammar.json` and `parser.c`" action for the appropriate commit.
- Go down to `Artifacts` and click on `generated-parser-src`. All the relevant parser files will be available in your
download.

View File

@@ -0,0 +1,44 @@
{
"targets": [
{
"target_name": "tree_sitter_swift_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [
"src",
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
# NOTE: if your language has an external scanner, add it here.
"src/scanner.c",
],
"cflags_c": [
"-std=c11",
],
"actions": [
{
"action_name": "wait_for_tree_sitter",
"action": ["node", "scripts/wait-for-tree-sitter.js"],
"inputs": [],
"outputs": ["node_modules/tree-sitter-cli"]
},
{
"action_name": "generate_header_files",
"inputs": [
"grammar.js",
"node_modules/tree-sitter-cli"
],
"outputs": [
"src/grammar.json",
"src/node-types.json",
"src/parser.c",
"src/tree_sitter",
],
"action": ["tree-sitter", "generate", "--no-bindings"],
}
]
}
]
}

View File

@@ -0,0 +1,20 @@
#include <napi.h>
typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage *tree_sitter_swift();
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
Napi::Object Init(Napi::Env env, Napi::Object exports) {
exports["name"] = Napi::String::New(env, "swift");
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_swift());
language.TypeTag(&LANGUAGE_TYPE_TAG);
exports["language"] = language;
return exports;
}
NODE_API_MODULE(tree_sitter_swift_binding, Init)

View File

@@ -0,0 +1,7 @@
const root = require("path").join(__dirname, "..", "..");
module.exports = require("node-gyp-build")(root);
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

View File

@@ -0,0 +1,19 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.std("c11").include(src_dir);
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
c_config.compile("tree-sitter-swift");
}

View File

@@ -0,0 +1,68 @@
//! This crate provides Swift language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = r#"
//! "#;
//! let mut parser = tree_sitter::Parser::new();
//! let language = tree_sitter_swift::LANGUAGE;
//! parser
//! .set_language(&language.into())
//! .expect("Error loading Swift parser");
//! let tree = parser.parse(code, None).unwrap();
//! assert!(!tree.root_node().has_error());
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter_language::LanguageFn;
unsafe extern "C" {
fn tree_sitter_swift() -> *const ();
}
/// The tree-sitter [`LanguageFn`] for this grammar.
pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_swift) };
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&super::LANGUAGE.into())
.expect("Error loading Swift parser");
}
#[test]
fn test_can_parse_basic_file() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&super::LANGUAGE.into())
.expect("Error loading Swift parser");
let tree = parser
.parse("_ = \"Hello!\"\n", None)
.expect("Unable to parse!");
assert_eq!(
"(source_file (assignment target: (directly_assignable_expression (simple_identifier)) result: (line_string_literal text: (line_str_text))))",
tree.root_node().to_sexp(),
);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
{
"name": "tree-sitter-swift",
"version": "0.7.2",
"description": "A tree-sitter grammar for the Swift programming language.",
"main": "bindings/node/index.js",
"types": "bindings/node",
"scripts": {
"install": "node-gyp-build",
"prestart": "tree-sitter build --wasm",
"start": "tree-sitter playground",
"test": "node --test bindings/node/*_test.js"
},
"repository": {
"type": "git",
"url": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
},
"tree-sitter": [
{
"scope": "source.swift",
"file-types": [
"swift"
],
"injection-regex": "swift",
"highlights": "queries/highlights.scm",
"locals": "queries/locals.scm",
"injections": "queries/injections.scm"
}
],
"keywords": [
"parser",
"swift"
],
"files": [
"grammar.js",
"tree-sitter.json",
"binding.gyp",
"prebuilds/**",
"bindings/node/*",
"queries/*",
"scripts/*",
"src/**"
],
"author": "Alex Pinkus <alex.pinkus@gmail.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/alex-pinkus/tree-sitter-swift/issues"
},
"homepage": "https://github.com/alex-pinkus/tree-sitter-swift#readme",
"dependencies": {
"node-addon-api": "^8.0.0",
"node-gyp-build": "^4.8.0",
"tree-sitter-cli": "^0.23",
"which": "2.0.2"
},
"peerDependencies": {
"tree-sitter": "^0.22.1"
},
"peerDependenciesMeta": {
"tree_sitter": {
"optional": true
}
},
"devDependencies": {
"node-gyp": "^10.0.1",
"prebuildify": "^6.0.0",
"prettier": "2.3.2"
}
}

View File

@@ -0,0 +1,35 @@
; format-ignore
[
(protocol_body) ; protocol Foo { ... }
(class_body) ; class Foo { ... }
(enum_class_body) ; enum Foo { ... }
(function_body) ; func Foo (...) {...}
(computed_property) ; { ... }
(computed_getter) ; get { ... }
(computed_setter) ; set { ... }
(do_statement)
(if_statement)
(for_statement)
(switch_statement)
(while_statement)
(guard_statement)
(switch_entry)
(type_parameters) ; x<Foo>
(tuple_type) ; (...)
(array_type) ; [String]
(dictionary_type) ; [Foo: Bar]
(call_expression) ; callFunc(...)
(tuple_expression) ; ( foo + bar )
(array_literal) ; [ foo, bar ]
(dictionary_literal) ; [ foo: bar, x: y ]
(lambda_literal)
(willset_didset_block)
(willset_clause)
(didset_clause)
(import_declaration)+
] @fold

View File

@@ -0,0 +1,336 @@
[
"."
";"
":"
","
] @punctuation.delimiter
[
"("
")"
"["
"]"
"{"
"}"
] @punctuation.bracket
; Identifiers
(type_identifier) @type
[
(self_expression)
(super_expression)
] @variable.builtin
; Declarations
[
"func"
"deinit"
] @keyword.function
[
(visibility_modifier)
(member_modifier)
(function_modifier)
(property_modifier)
(parameter_modifier)
(inheritance_modifier)
(mutation_modifier)
] @keyword.modifier
(simple_identifier) @variable
(function_declaration
(simple_identifier) @function.method)
(protocol_function_declaration
name: (simple_identifier) @function.method)
(init_declaration
"init" @constructor)
(parameter
external_name: (simple_identifier) @variable.parameter)
(parameter
name: (simple_identifier) @variable.parameter)
(type_parameter
(type_identifier) @variable.parameter)
(inheritance_constraint
(identifier
(simple_identifier) @variable.parameter))
(equality_constraint
(identifier
(simple_identifier) @variable.parameter))
[
"protocol"
"extension"
"indirect"
"nonisolated"
"override"
"convenience"
"required"
"some"
"any"
"weak"
"unowned"
"didSet"
"willSet"
"subscript"
"let"
"var"
(throws)
(where_keyword)
(getter_specifier)
(setter_specifier)
(modify_specifier)
(else)
(as_operator)
] @keyword
[
"enum"
"struct"
"class"
"typealias"
] @keyword.type
[
"async"
"await"
] @keyword.coroutine
(shebang_line) @keyword.directive
(class_body
(property_declaration
(pattern
(simple_identifier) @variable.member)))
(protocol_property_declaration
(pattern
(simple_identifier) @variable.member))
(navigation_expression
(navigation_suffix
(simple_identifier) @variable.member))
(value_argument
name: (value_argument_label
(simple_identifier) @variable.member))
(import_declaration
"import" @keyword.import)
(enum_entry
"case" @keyword)
(modifiers
(attribute
"@" @attribute
(user_type
(type_identifier) @attribute)))
; Function calls
(call_expression
(simple_identifier) @function.call) ; foo()
(call_expression
; foo.bar.baz(): highlight the baz()
(navigation_expression
(navigation_suffix
(simple_identifier) @function.call)))
(call_expression
(prefix_expression
(simple_identifier) @function.call)) ; .foo()
((navigation_expression
(simple_identifier) @type) ; SomeType.method(): highlight SomeType as a type
(#match? @type "^[A-Z]"))
(directive) @keyword.directive
; See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Keywords-and-Punctuation
[
(diagnostic)
(availability_condition)
(playground_literal)
(key_path_string_expression)
(selector_expression)
(external_macro_definition)
] @function.macro
(special_literal) @constant.macro
; Statements
(for_statement
"for" @keyword.repeat)
(for_statement
"in" @keyword.repeat)
[
"while"
"repeat"
"continue"
"break"
] @keyword.repeat
(guard_statement
"guard" @keyword.conditional)
(if_statement
"if" @keyword.conditional)
(switch_statement
"switch" @keyword.conditional)
(switch_entry
"case" @keyword)
(switch_entry
"fallthrough" @keyword)
(switch_entry
(default_keyword) @keyword)
"return" @keyword.return
(ternary_expression
[
"?"
":"
] @keyword.conditional.ternary)
[
(try_operator)
"do"
(throw_keyword)
(catch_keyword)
] @keyword.exception
(statement_label) @label
; Comments
[
(comment)
(multiline_comment)
] @comment @spell
((comment) @comment.documentation
(#match? @comment.documentation "^///[^/]"))
((comment) @comment.documentation
(#match? @comment.documentation "^///$"))
((multiline_comment) @comment.documentation
(#match? @comment.documentation "^/[*][*][^*].*[*]/$"))
; String literals
(line_str_text) @string
(str_escaped_char) @string.escape
(multi_line_str_text) @string
(raw_str_part) @string
(raw_str_end_part) @string
(line_string_literal
[
"\\("
")"
] @punctuation.special)
(multi_line_string_literal
[
"\\("
")"
] @punctuation.special)
(raw_str_interpolation
[
(raw_str_interpolation_start)
")"
] @punctuation.special)
[
"\""
"\"\"\""
] @string
; Lambda literals
(lambda_literal
"in" @keyword.operator)
; Basic literals
[
(integer_literal)
(hex_literal)
(oct_literal)
(bin_literal)
] @number
(real_literal) @number.float
(boolean_literal) @boolean
"nil" @constant.builtin
(wildcard_pattern) @character.special
; Regex literals
(regex_literal) @string.regexp
; Operators
(custom_operator) @operator
[
"+"
"-"
"*"
"/"
"%"
"="
"+="
"-="
"*="
"/="
"<"
">"
"<<"
">>"
"<="
">="
"++"
"--"
"^"
"&"
"&&"
"|"
"||"
"~"
"%="
"!="
"!=="
"=="
"==="
"?"
"??"
"->"
"..<"
"..."
(bang)
] @operator
(type_arguments
[
"<"
">"
] @punctuation.bracket)

View File

@@ -0,0 +1,123 @@
; format-ignore
[
; ... refers to the section that will get affected by this indent.begin capture
(protocol_body) ; protocol Foo { ... }
(class_body) ; class Foo { ... }
(enum_class_body) ; enum Foo { ... }
(function_declaration) ; func Foo (...) {...}
(init_declaration) ; init(...) {...}
(deinit_declaration) ; deinit {...}
(computed_property) ; { ... }
(subscript_declaration) ; subscript Foo(...) { ... }
(computed_getter) ; get { ... }
(computed_setter) ; set { ... }
(assignment) ; a = b
(control_transfer_statement) ; return ...
(for_statement)
(while_statement)
(repeat_while_statement)
(do_statement)
(if_statement)
(switch_statement)
(guard_statement)
(type_parameters) ; x<Foo>
(tuple_type) ; (...)
(array_type) ; [String]
(dictionary_type) ; [Foo: Bar]
(call_expression) ; callFunc(...)
(tuple_expression) ; ( foo + bar )
(array_literal) ; [ foo, bar ]
(dictionary_literal) ; [ foo: bar, x: y ]
(lambda_literal)
(willset_didset_block)
(willset_clause)
(didset_clause)
] @indent.begin
(init_declaration) @indent.begin
(init_declaration
[
"init"
"("
] @indent.branch)
; indentation for init parameters
(init_declaration
")" @indent.branch @indent.end)
(init_declaration
(parameter) @indent.begin
(#set! indent.immediate))
; @something(...)
(modifiers
(attribute) @indent.begin)
(function_declaration
(modifiers
.
(attribute)
(_)* @indent.branch)
.
_ @indent.branch
(#not-kind-eq? @indent.branch "type_parameters" "parameter"))
(ERROR
[
"<"
"{"
"("
"["
]) @indent.begin
; if-elseif
(if_statement
(if_statement) @indent.dedent)
; case Foo:
; default Foo:
; @attribute default Foo:
(switch_entry
.
_ @indent.branch)
(function_declaration
")" @indent.branch)
(type_parameters
">" @indent.branch @indent.end .)
(tuple_expression
")" @indent.branch @indent.end)
(value_arguments
")" @indent.branch @indent.end)
(tuple_type
")" @indent.branch @indent.end)
(modifiers
(attribute
")" @indent.branch @indent.end))
[
"}"
"]"
] @indent.branch @indent.end
[
; (ERROR)
(comment)
(multiline_comment)
(raw_str_part)
(multi_line_string_literal)
] @indent.auto
(directive) @indent.ignore

View File

@@ -0,0 +1,10 @@
; Parse regex syntax within regex literals
((regex_literal) @injection.content
(#set! injection.language "regex"))
([
(comment)
(multiline_comment)
] @injection.content
(#set! injection.language "comment"))

View File

@@ -0,0 +1,23 @@
(import_declaration
(identifier) @local.definition.import)
(function_declaration
name: (simple_identifier) @local.definition.function)
; Scopes
[
(statements)
(for_statement)
(while_statement)
(repeat_while_statement)
(do_statement)
(if_statement)
(guard_statement)
(switch_statement)
(property_declaration)
(function_declaration)
(class_declaration)
(protocol_declaration)
] @local.scope

View File

@@ -0,0 +1,66 @@
(protocol_declaration
declaration_kind: "protocol" @name
.
_ * @name
.
body: (protocol_body)
) @item
(class_declaration
declaration_kind: (
[
"actor"
"class"
"extension"
"enum"
"struct"
]
) @name
.
_ * @name
.
body: (_)
) @item
(init_declaration
name: "init" @name
.
_ * @name
.
body: (function_body)
) @item
(deinit_declaration
"deinit" @name) @item
(function_declaration
"func" @name
.
_ * @name
.
body: (function_body)
) @item
(class_body
(property_declaration
(value_binding_pattern) @name
name: (pattern) @name
(type_annotation)? @name
) @item
)
(enum_class_body
(property_declaration
(value_binding_pattern) @name
name: (pattern) @name
(type_annotation)? @name
) @item
)
(
(protocol_function_declaration) @name
) @item
(
(protocol_property_declaration) @name
) @item

View File

@@ -0,0 +1,51 @@
(class_declaration
name: (type_identifier) @name) @definition.class
(protocol_declaration
name: (type_identifier) @name) @definition.interface
(class_declaration
(class_body
[
(function_declaration
name: (simple_identifier) @name
)
(subscript_declaration
(parameter (simple_identifier) @name)
)
(init_declaration "init" @name)
(deinit_declaration "deinit" @name)
]
)
) @definition.method
(protocol_declaration
(protocol_body
[
(protocol_function_declaration
name: (simple_identifier) @name
)
(subscript_declaration
(parameter (simple_identifier) @name)
)
(init_declaration "init" @name)
]
)
) @definition.method
(class_declaration
(class_body
[
(property_declaration
(pattern (simple_identifier) @name)
)
]
)
) @definition.property
(property_declaration
(pattern (simple_identifier) @name)
) @definition.property
(function_declaration
name: (simple_identifier) @name) @definition.function

View File

@@ -0,0 +1,19 @@
; MARK: Structure
(function_declaration
body: (_) @function.inside) @function.around
; TODO: Classes/structs/enums
; MARK: Tests
; Only matches prefix test. Other conventions
; might be nice to add!
(function_declaration
name: (simple_identifier) @_name
(#match? @_name "^test")
)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,929 @@
#include "tree_sitter/parser.h"
#include <string.h>
#include <wctype.h>
#define TOKEN_COUNT 33
enum TokenType {
BLOCK_COMMENT,
RAW_STR_PART,
RAW_STR_CONTINUING_INDICATOR,
RAW_STR_END_PART,
IMPLICIT_SEMI,
EXPLICIT_SEMI,
ARROW_OPERATOR,
DOT_OPERATOR,
CONJUNCTION_OPERATOR,
DISJUNCTION_OPERATOR,
NIL_COALESCING_OPERATOR,
EQUAL_SIGN,
EQ_EQ,
PLUS_THEN_WS,
MINUS_THEN_WS,
BANG,
THROWS_KEYWORD,
RETHROWS_KEYWORD,
DEFAULT_KEYWORD,
WHERE_KEYWORD,
ELSE_KEYWORD,
CATCH_KEYWORD,
AS_KEYWORD,
AS_QUEST,
AS_BANG,
ASYNC_KEYWORD,
CUSTOM_OPERATOR,
HASH_SYMBOL,
DIRECTIVE_IF,
DIRECTIVE_ELSEIF,
DIRECTIVE_ELSE,
DIRECTIVE_ENDIF,
FAKE_TRY_BANG
};
#define OPERATOR_COUNT 20
const char* OPERATORS[OPERATOR_COUNT] = {
"->",
".",
"&&",
"||",
"??",
"=",
"==",
"+",
"-",
"!",
"throws",
"rethrows",
"default",
"where",
"else",
"catch",
"as",
"as?",
"as!",
"async"
};
enum IllegalTerminatorGroup {
ALPHANUMERIC,
OPERATOR_SYMBOLS,
OPERATOR_OR_DOT,
NON_WHITESPACE
};
const enum IllegalTerminatorGroup OP_ILLEGAL_TERMINATORS[OPERATOR_COUNT] = {
OPERATOR_SYMBOLS, // ->
OPERATOR_OR_DOT, // .
OPERATOR_SYMBOLS, // &&
OPERATOR_SYMBOLS, // ||
OPERATOR_SYMBOLS, // ??
OPERATOR_SYMBOLS, // =
OPERATOR_SYMBOLS, // ==
NON_WHITESPACE, // +
NON_WHITESPACE, // -
OPERATOR_SYMBOLS, // !
ALPHANUMERIC, // throws
ALPHANUMERIC, // rethrows
ALPHANUMERIC, // default
ALPHANUMERIC, // where
ALPHANUMERIC, // else
ALPHANUMERIC, // catch
ALPHANUMERIC, // as
OPERATOR_SYMBOLS, // as?
OPERATOR_SYMBOLS, // as!
ALPHANUMERIC // async
};
const enum TokenType OP_SYMBOLS[OPERATOR_COUNT] = {
ARROW_OPERATOR,
DOT_OPERATOR,
CONJUNCTION_OPERATOR,
DISJUNCTION_OPERATOR,
NIL_COALESCING_OPERATOR,
EQUAL_SIGN,
EQ_EQ,
PLUS_THEN_WS,
MINUS_THEN_WS,
BANG,
THROWS_KEYWORD,
RETHROWS_KEYWORD,
DEFAULT_KEYWORD,
WHERE_KEYWORD,
ELSE_KEYWORD,
CATCH_KEYWORD,
AS_KEYWORD,
AS_QUEST,
AS_BANG,
ASYNC_KEYWORD
};
const uint64_t OP_SYMBOL_SUPPRESSOR[OPERATOR_COUNT] = {
0, // ARROW_OPERATOR,
0, // DOT_OPERATOR,
0, // CONJUNCTION_OPERATOR,
0, // DISJUNCTION_OPERATOR,
0, // NIL_COALESCING_OPERATOR,
0, // EQUAL_SIGN,
0, // EQ_EQ,
0, // PLUS_THEN_WS,
0, // MINUS_THEN_WS,
1UL << FAKE_TRY_BANG, // BANG,
0, // THROWS_KEYWORD,
0, // RETHROWS_KEYWORD,
0, // DEFAULT_KEYWORD,
0, // WHERE_KEYWORD,
0, // ELSE_KEYWORD,
0, // CATCH_KEYWORD,
0, // AS_KEYWORD,
0, // AS_QUEST,
0, // AS_BANG,
0, // ASYNC_KEYWORD
};
#define RESERVED_OP_COUNT 31
const char* RESERVED_OPS[RESERVED_OP_COUNT] = {
"/",
"=",
"-",
"+",
"!",
"*",
"%",
"<",
">",
"&",
"|",
"^",
"?",
"~",
".",
"..",
"->",
"/*",
"*/",
"+=",
"-=",
"*=",
"/=",
"%=",
">>",
"<<",
"++",
"--",
"===",
"...",
"..<"
};
static bool is_cross_semi_token(enum TokenType op) {
switch(op) {
case ARROW_OPERATOR:
case DOT_OPERATOR:
case CONJUNCTION_OPERATOR:
case DISJUNCTION_OPERATOR:
case NIL_COALESCING_OPERATOR:
case EQUAL_SIGN:
case EQ_EQ:
case PLUS_THEN_WS:
case MINUS_THEN_WS:
case THROWS_KEYWORD:
case RETHROWS_KEYWORD:
case DEFAULT_KEYWORD:
case WHERE_KEYWORD:
case ELSE_KEYWORD:
case CATCH_KEYWORD:
case AS_KEYWORD:
case AS_QUEST:
case AS_BANG:
case ASYNC_KEYWORD:
case CUSTOM_OPERATOR:
return true;
case BANG:
default:
return false;
}
}
#define NON_CONSUMING_CROSS_SEMI_CHAR_COUNT 3
const uint32_t NON_CONSUMING_CROSS_SEMI_CHARS[NON_CONSUMING_CROSS_SEMI_CHAR_COUNT] = { '?', ':', '{' };
/**
* All possible results of having performed some sort of parsing.
*
* A parser can return a result along two dimensions:
* 1. Should the scanner continue trying to find another result?
* 2. Was some result produced by this parsing attempt?
*
* These are flattened into a single enum together. When the function returns one of the `TOKEN_FOUND` cases, it
* will always populate its `symbol_result` field. When it returns one of the `STOP_PARSING` cases, callers should
* immediately return (with the value, if there is one).
*/
enum ParseDirective {
CONTINUE_PARSING_NOTHING_FOUND,
CONTINUE_PARSING_TOKEN_FOUND,
CONTINUE_PARSING_SLASH_CONSUMED,
STOP_PARSING_NOTHING_FOUND,
STOP_PARSING_TOKEN_FOUND,
STOP_PARSING_END_OF_FILE
};
struct ScannerState {
uint32_t ongoing_raw_str_hash_count;
};
void *tree_sitter_swift_external_scanner_create() {
return calloc(1, sizeof(struct ScannerState));
}
void tree_sitter_swift_external_scanner_destroy(void *payload) {
free(payload);
}
void tree_sitter_swift_external_scanner_reset(void *payload) {
struct ScannerState *state = (struct ScannerState *)payload;
state->ongoing_raw_str_hash_count = 0;
}
unsigned tree_sitter_swift_external_scanner_serialize(void *payload, char *buffer) {
struct ScannerState *state = (struct ScannerState *)payload;
uint32_t hash_count = state->ongoing_raw_str_hash_count;
buffer[0] = (hash_count >> 24) & 0xff;
buffer[1] = (hash_count >> 16) & 0xff;
buffer[2] = (hash_count >> 8) & 0xff;
buffer[3] = (hash_count) & 0xff;
return 4;
}
void tree_sitter_swift_external_scanner_deserialize(
void *payload,
const char *buffer,
unsigned length
) {
if (length < 4) {
return;
}
uint32_t hash_count = (
(((uint32_t) buffer[0]) << 24) |
(((uint32_t) buffer[1]) << 16) |
(((uint32_t) buffer[2]) << 8) |
(((uint32_t) buffer[3]))
);
struct ScannerState *state = (struct ScannerState *)payload;
state->ongoing_raw_str_hash_count = hash_count;
}
static void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
static bool should_treat_as_wspace(int32_t character) {
return iswspace(character) || (((int32_t) ';') == character);
}
static int32_t encountered_op_count(bool *encountered_operator) {
int32_t encountered = 0;
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
if (encountered_operator[op_idx]) {
encountered++;
}
}
return encountered;
}
static bool any_reserved_ops(uint8_t *encountered_reserved_ops) {
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
if (encountered_reserved_ops[op_idx] == 2) {
return true;
}
}
return false;
}
static bool is_legal_custom_operator(
int32_t char_idx,
int32_t first_char,
int32_t cur_char
) {
bool is_first_char = !char_idx;
switch (cur_char) {
case '=':
case '-':
case '+':
case '!':
case '%':
case '<':
case '>':
case '&':
case '|':
case '^':
case '?':
case '~':
return true;
case '.':
// Grammar allows `.` for any operator that starts with `.`
return is_first_char || first_char == '.';
case '*':
case '/':
// Not listed in the grammar, but `/*` and `//` can't be the start of an operator since they start comments
return char_idx != 1 || first_char != '/';
default:
if (
(cur_char >= 0x00A1 && cur_char <= 0x00A7) ||
(cur_char == 0x00A9) ||
(cur_char == 0x00AB) ||
(cur_char == 0x00AC) ||
(cur_char == 0x00AE) ||
(cur_char >= 0x00B0 && cur_char <= 0x00B1) ||
(cur_char == 0x00B6) ||
(cur_char == 0x00BB) ||
(cur_char == 0x00BF) ||
(cur_char == 0x00D7) ||
(cur_char == 0x00F7) ||
(cur_char >= 0x2016 && cur_char <= 0x2017) ||
(cur_char >= 0x2020 && cur_char <= 0x2027) ||
(cur_char >= 0x2030 && cur_char <= 0x203E) ||
(cur_char >= 0x2041 && cur_char <= 0x2053) ||
(cur_char >= 0x2055 && cur_char <= 0x205E) ||
(cur_char >= 0x2190 && cur_char <= 0x23FF) ||
(cur_char >= 0x2500 && cur_char <= 0x2775) ||
(cur_char >= 0x2794 && cur_char <= 0x2BFF) ||
(cur_char >= 0x2E00 && cur_char <= 0x2E7F) ||
(cur_char >= 0x3001 && cur_char <= 0x3003) ||
(cur_char >= 0x3008 && cur_char <= 0x3020) ||
(cur_char == 0x3030)
) {
return true;
} else if (
(cur_char >= 0x0300 && cur_char <= 0x036f) ||
(cur_char >= 0x1DC0 && cur_char <= 0x1DFF) ||
(cur_char >= 0x20D0 && cur_char <= 0x20FF) ||
(cur_char >= 0xFE00 && cur_char <= 0xFE0F) ||
(cur_char >= 0xFE20 && cur_char <= 0xFE2F) ||
(cur_char >= 0xE0100 && cur_char <= 0xE01EF)
) {
return !is_first_char;
} else {
return false;
}
}
}
static bool eat_operators(
TSLexer *lexer,
const bool *valid_symbols,
bool mark_end,
const int32_t prior_char,
enum TokenType *symbol_result
) {
bool possible_operators[OPERATOR_COUNT];
uint8_t reserved_operators[RESERVED_OP_COUNT];
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
possible_operators[op_idx] = valid_symbols[OP_SYMBOLS[op_idx]] && (!prior_char || OPERATORS[op_idx][0] == prior_char);
}
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
reserved_operators[op_idx] = !prior_char || RESERVED_OPS[op_idx][0] == prior_char;
}
bool possible_custom_operator = valid_symbols[CUSTOM_OPERATOR];
int32_t first_char = prior_char ? prior_char : lexer->lookahead;
int32_t last_examined_char = first_char;
int32_t str_idx = prior_char ? 1 : 0;
int32_t full_match = -1;
while(true) {
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
if (!possible_operators[op_idx]) {
continue;
}
if (OPERATORS[op_idx][str_idx] == '\0') {
// Make sure that the operator is allowed to have the next character as its lookahead.
enum IllegalTerminatorGroup illegal_terminators = OP_ILLEGAL_TERMINATORS[op_idx];
switch (lexer->lookahead) {
// See "Operators":
// https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID418
case '/':
case '=':
case '-':
case '+':
case '!':
case '*':
case '%':
case '<':
case '>':
case '&':
case '|':
case '^':
case '?':
case '~':
if (illegal_terminators == OPERATOR_SYMBOLS) {
break;
} // Otherwise, intentionally fall through to the OPERATOR_OR_DOT case
// fall through
case '.':
if (illegal_terminators == OPERATOR_OR_DOT) {
break;
} // Otherwise, fall through to DEFAULT which checks its groups directly
// fall through
default:
if (iswalnum(lexer->lookahead) && illegal_terminators == ALPHANUMERIC) {
break;
}
if (!iswspace(lexer->lookahead) && illegal_terminators == NON_WHITESPACE) {
break;
}
full_match = op_idx;
if (mark_end) {
lexer->mark_end(lexer);
}
}
possible_operators[op_idx] = false;
continue;
}
if (OPERATORS[op_idx][str_idx] != lexer->lookahead) {
possible_operators[op_idx] = false;
continue;
}
}
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
if (!reserved_operators[op_idx]) {
continue;
}
if (RESERVED_OPS[op_idx][str_idx] == '\0') {
reserved_operators[op_idx] = 0;
continue;
}
if (RESERVED_OPS[op_idx][str_idx] != lexer->lookahead) {
reserved_operators[op_idx] = 0;
continue;
}
if (RESERVED_OPS[op_idx][str_idx + 1] == '\0') {
reserved_operators[op_idx] = 2;
continue;
}
}
possible_custom_operator = possible_custom_operator && is_legal_custom_operator(
str_idx,
first_char,
lexer->lookahead
);
uint32_t encountered_ops = encountered_op_count(possible_operators);
if (encountered_ops == 0) {
if (!possible_custom_operator) {
break;
} else if (mark_end && full_match == -1) {
lexer->mark_end(lexer);
}
}
last_examined_char = lexer->lookahead;
lexer->advance(lexer, false);
str_idx += 1;
if (encountered_ops == 0 && !is_legal_custom_operator(
str_idx,
first_char,
lexer->lookahead
)) {
break;
}
}
if (full_match != -1) {
// We have a match -- first see if that match has a symbol that suppresses it. For example, in `try!`, we do not
// want to emit the `!` as a symbol in our scanner, because we want the parser to have the chance to parse it as
// an immediate token.
uint64_t suppressing_symbols = OP_SYMBOL_SUPPRESSOR[full_match];
if (suppressing_symbols) {
for (uint64_t suppressor = 0; suppressor < TOKEN_COUNT; suppressor++) {
if (!(suppressing_symbols & 1ULL << suppressor)) {
continue;
}
// The suppressing symbol is valid in this position, so skip it.
if (valid_symbols[suppressor]) {
return false;
}
}
}
*symbol_result = OP_SYMBOLS[full_match];
return true;
}
if (possible_custom_operator && !any_reserved_ops(reserved_operators)) {
if ((last_examined_char != '<' || iswspace(lexer->lookahead)) && mark_end) {
lexer->mark_end(lexer);
}
*symbol_result = CUSTOM_OPERATOR;
return true;
}
return false;
}
static enum ParseDirective eat_comment(
TSLexer *lexer,
const bool *valid_symbols,
bool mark_end,
enum TokenType *symbol_result
) {
if (lexer->lookahead != '/') {
return CONTINUE_PARSING_NOTHING_FOUND;
}
advance(lexer);
if (lexer->lookahead != '*') {
return CONTINUE_PARSING_SLASH_CONSUMED;
}
advance(lexer);
bool after_star = false;
unsigned nesting_depth = 1;
for (;;) {
switch (lexer->lookahead) {
case '\0':
return STOP_PARSING_END_OF_FILE;
case '*':
advance(lexer);
after_star = true;
break;
case '/':
if (after_star) {
advance(lexer);
after_star = false;
nesting_depth--;
if (nesting_depth == 0) {
if (mark_end) {
lexer->mark_end(lexer);
}
*symbol_result = BLOCK_COMMENT;
return STOP_PARSING_TOKEN_FOUND;
}
} else {
advance(lexer);
after_star = false;
if (lexer->lookahead == '*') {
nesting_depth++;
advance(lexer);
}
}
break;
default:
advance(lexer);
after_star = false;
break;
}
}
}
static enum ParseDirective eat_whitespace(
TSLexer *lexer,
const bool *valid_symbols,
enum TokenType *symbol_result
) {
enum ParseDirective ws_directive = CONTINUE_PARSING_NOTHING_FOUND;
bool semi_is_valid = valid_symbols[IMPLICIT_SEMI] && valid_symbols[EXPLICIT_SEMI];
uint32_t lookahead;
while (should_treat_as_wspace(lookahead = lexer->lookahead)) {
if (lookahead == ';') {
if (semi_is_valid) {
ws_directive = STOP_PARSING_TOKEN_FOUND;
lexer->advance(lexer, false);
}
break;
}
lexer->advance(lexer, true);
lexer->mark_end(lexer);
if (ws_directive == CONTINUE_PARSING_NOTHING_FOUND && (lookahead == '\n' || lookahead == '\r')) {
ws_directive = CONTINUE_PARSING_TOKEN_FOUND;
}
}
enum ParseDirective any_comment = CONTINUE_PARSING_NOTHING_FOUND;
if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND && lookahead == '/') {
bool has_seen_single_comment = false;
while (lexer->lookahead == '/') {
// It's possible that this is a comment - start an exploratory mission to find out, and if it is, look for what
// comes after it. We care about what comes after it for the purpose of suppressing the newline.
enum TokenType multiline_comment_result;
any_comment = eat_comment(lexer, valid_symbols, /* mark_end */ false, &multiline_comment_result);
if (any_comment == STOP_PARSING_TOKEN_FOUND) {
// This is a multiline comment. This scanner should be parsing those, so we might want to bail out and
// emit it instead. However, we only want to do that if we haven't advanced through a _single_ line
// comment on the way - otherwise that will get lumped into this.
if (!has_seen_single_comment) {
lexer->mark_end(lexer);
*symbol_result = multiline_comment_result;
return STOP_PARSING_TOKEN_FOUND;
}
} else if (any_comment == STOP_PARSING_END_OF_FILE) {
return STOP_PARSING_END_OF_FILE;
} else if (any_comment == CONTINUE_PARSING_SLASH_CONSUMED) {
// We accidentally ate a slash -- we should actually bail out, say we saw nothing, and let the next pass
// take it from after the newline.
return CONTINUE_PARSING_SLASH_CONSUMED;
} else if (lexer->lookahead == '/') {
// There wasn't a multiline comment, which we know means that the comment parser ate its `/` and then
// bailed out. If it had seen anything comment-like after that first `/` it would have continued going
// and eventually had a well-formed comment or an EOF. Thus, if we're currently looking at a `/`, it's
// the second one of those and it means we have a single-line comment.
has_seen_single_comment = true;
while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
lexer->advance(lexer, true);
}
} else if (iswspace(lexer->lookahead)) {
// We didn't see any type of comment - in fact, we saw an operator that we don't normally treat as an
// operator. Still, this is a reason to stop parsing.
return STOP_PARSING_NOTHING_FOUND;
}
// If we skipped through some comment, we're at whitespace now, so advance.
while(iswspace(lexer->lookahead)) {
any_comment = CONTINUE_PARSING_NOTHING_FOUND; // We're advancing, so clear out the comment
lexer->advance(lexer, true);
}
}
enum TokenType operator_result;
bool saw_operator = eat_operators(
lexer,
valid_symbols,
/* mark_end */ false,
'\0',
&operator_result
);
if (saw_operator) {
// The operator we saw should suppress the newline, so bail out.
return STOP_PARSING_NOTHING_FOUND;
} else {
// Promote the implicit newline to an explicit one so we don't check for operators again.
*symbol_result = IMPLICIT_SEMI;
ws_directive = STOP_PARSING_TOKEN_FOUND;
}
}
// Let's consume operators that can live after a "semicolon" style newline. Before we do that, though, we want to
// check for a set of characters that we do not consume, but that still suppress the semi.
if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND) {
for (int i = 0; i < NON_CONSUMING_CROSS_SEMI_CHAR_COUNT; i++) {
if (NON_CONSUMING_CROSS_SEMI_CHARS[i] == lookahead) {
return CONTINUE_PARSING_NOTHING_FOUND;
}
}
}
if (semi_is_valid && ws_directive != CONTINUE_PARSING_NOTHING_FOUND) {
*symbol_result = lookahead == ';' ? EXPLICIT_SEMI : IMPLICIT_SEMI;
return ws_directive;
}
return CONTINUE_PARSING_NOTHING_FOUND;
}
#define DIRECTIVE_COUNT 4
const char* DIRECTIVES[OPERATOR_COUNT] = {
"if",
"elseif",
"else",
"endif"
};
const enum TokenType DIRECTIVE_SYMBOLS[DIRECTIVE_COUNT] = {
DIRECTIVE_IF,
DIRECTIVE_ELSEIF,
DIRECTIVE_ELSE,
DIRECTIVE_ENDIF
};
static enum TokenType find_possible_compiler_directive(TSLexer *lexer) {
bool possible_directives[DIRECTIVE_COUNT];
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
possible_directives[dir_idx] = true;
}
int32_t str_idx = 0;
int32_t full_match = -1;
while(true) {
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
if (!possible_directives[dir_idx]) {
continue;
}
uint8_t expected_char = DIRECTIVES[dir_idx][str_idx];
if (expected_char == '\0') {
full_match = dir_idx;
lexer->mark_end(lexer);
}
if (expected_char != lexer->lookahead) {
possible_directives[dir_idx] = false;
continue;
}
}
uint8_t match_count = 0;
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx += 1) {
if (possible_directives[dir_idx]) {
match_count += 1;
}
}
if (match_count == 0) {
break;
}
lexer->advance(lexer, false);
str_idx += 1;
}
if (full_match == -1) {
// No compiler directive found, so just match the starting symbol
return HASH_SYMBOL;
}
return DIRECTIVE_SYMBOLS[full_match];
}
static bool eat_raw_str_part(
struct ScannerState *state,
TSLexer *lexer,
const bool *valid_symbols,
enum TokenType *symbol_result
) {
uint32_t hash_count = state->ongoing_raw_str_hash_count;
if (!valid_symbols[RAW_STR_PART]) {
return false;
} else if (hash_count == 0) {
// If this is a raw_str_part, it's the first one - look for hashes
while (lexer->lookahead == '#') {
hash_count += 1;
advance(lexer);
}
if (hash_count == 0) {
return false;
}
if (lexer->lookahead == '"') {
advance(lexer);
} else if (hash_count == 1) {
lexer->mark_end(lexer);
*symbol_result = find_possible_compiler_directive(lexer);
return true;
} else {
return false;
}
} else if (valid_symbols[RAW_STR_CONTINUING_INDICATOR]) {
// This is the end of an interpolation - now it's another raw_str_part. This is a synthetic
// marker to tell us that the grammar just consumed a `(` symbol to close a raw
// interpolation (since we don't want to fire on every `(` in existence). We don't have
// anything to do except continue.
} else {
return false;
}
// We're in a state where anything other than `hash_count` hash symbols in a row should be eaten
// and is part of a string.
// The last character _before_ the hashes will tell us what happens next.
// Matters are also complicated by the fact that we don't want to consume every character we
// visit; if we see a `\#(`, for instance, with the appropriate number of hash symbols, we want
// to end our parsing _before_ that sequence. This allows highlighting tools to treat that as a
// separate token.
while (lexer->lookahead != '\0') {
uint8_t last_char = '\0';
lexer->mark_end(lexer); // We always want to parse thru the start of the string so far
// Advance through anything that isn't a hash symbol, because we want to count those.
while (lexer->lookahead != '#' && lexer->lookahead != '\0') {
last_char = lexer->lookahead;
advance(lexer);
if (last_char != '\\' || lexer->lookahead == '\\') {
// Mark a new end, but only if we didn't just advance past a `\` symbol, since we
// don't want to consume that. Exception: if this is a `\` that happens _right
// after_ another `\`, we for some reason _do_ want to consume that, because
// apparently that is parsed as a literal `\` followed by something escaped.
lexer->mark_end(lexer);
}
}
// We hit at least one hash - count them and see if they match.
uint32_t current_hash_count = 0;
while (lexer->lookahead == '#' && current_hash_count < hash_count) {
current_hash_count += 1;
advance(lexer);
}
// If we saw exactly the right number of hashes, one of three things is true:
// 1. We're trying to interpolate into this string.
// 2. The string just ended.
// 3. This was just some hash characters doing nothing important.
if (current_hash_count == hash_count) {
if (last_char == '\\' && lexer->lookahead == '(') {
// Interpolation case! Don't consume those chars; they get saved for grammar.js.
*symbol_result = RAW_STR_PART;
state->ongoing_raw_str_hash_count = hash_count;
return true;
} else if (last_char == '"') {
// The string is finished! Mark the end here, on the very last hash symbol.
lexer->mark_end(lexer);
*symbol_result = RAW_STR_END_PART;
state->ongoing_raw_str_hash_count = 0;
return true;
}
// Nothing special happened - let the string continue.
}
}
return false;
}
bool tree_sitter_swift_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
// Figure out our scanner state
struct ScannerState *state = (struct ScannerState *)payload;
// Consume any whitespace at the start.
enum TokenType ws_result;
enum ParseDirective ws_directive = eat_whitespace(lexer, valid_symbols, &ws_result);
if (ws_directive == STOP_PARSING_TOKEN_FOUND) {
lexer->result_symbol = ws_result;
return true;
}
if (ws_directive == STOP_PARSING_NOTHING_FOUND || ws_directive == STOP_PARSING_END_OF_FILE) {
return false;
}
bool has_ws_result = (ws_directive == CONTINUE_PARSING_TOKEN_FOUND);
// Now consume comments (before custom operators so that those aren't treated as comments)
enum TokenType comment_result;
enum ParseDirective comment = ws_directive == CONTINUE_PARSING_SLASH_CONSUMED ? ws_directive : eat_comment(lexer, valid_symbols, /* mark_end */ true, &comment_result);
if (comment == STOP_PARSING_TOKEN_FOUND) {
lexer->mark_end(lexer);
lexer->result_symbol = comment_result;
return true;
}
if (comment == STOP_PARSING_END_OF_FILE) {
return false;
}
// Now consume any operators that might cause our whitespace to be suppressed.
enum TokenType operator_result;
bool saw_operator = eat_operators(
lexer,
valid_symbols,
/* mark_end */ !has_ws_result,
comment == CONTINUE_PARSING_SLASH_CONSUMED ? '/' : '\0',
&operator_result
);
if (saw_operator && (!has_ws_result || is_cross_semi_token(operator_result))) {
lexer->result_symbol = operator_result;
if (has_ws_result) lexer->mark_end(lexer);
return true;
}
if (has_ws_result) {
// Don't `mark_end`, since we may have advanced through some operators.
lexer->result_symbol = ws_result;
return true;
}
// NOTE: this will consume any `#` characters it sees, even if it does not find a result. Keep
// it at the end so that it doesn't interfere with special literals or selectors!
enum TokenType raw_str_result;
bool saw_raw_str_part = eat_raw_str_part(state, lexer, valid_symbols, &raw_str_result);
if (saw_raw_str_part) {
lexer->result_symbol = raw_str_result;
return true;
}
return false;
}

View File

@@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t);
extern void *(*ts_current_calloc)(size_t, size_t);
extern void *(*ts_current_realloc)(void *, size_t);
extern void (*ts_current_free)(void *);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@@ -0,0 +1,266 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@@ -0,0 +1,39 @@
{
"grammars": [
{
"name": "swift",
"camelcase": "Swift",
"scope": "source.swift",
"path": ".",
"file-types": [
"swift"
],
"highlights": "queries/highlights.scm",
"injections": "queries/injections.scm",
"locals": "queries/locals.scm",
"injection-regex": "swift"
}
],
"metadata": {
"version": "0.7.2",
"license": "MIT",
"description": "A tree-sitter grammar for the Swift programming language.",
"authors": [
{
"name": "Alex Pinkus",
"email": "alex.pinkus@gmail.com"
}
],
"links": {
"repository": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
}
},
"bindings": {
"c": true,
"go": true,
"node": true,
"python": true,
"rust": true,
"swift": true
}
}