mirror of
https://github.com/github/codeql.git
synced 2026-05-14 03:09:26 +02:00
unified: vendor in tree-sitter-swift
This commit is contained in:
40
unified/extractor/tree-sitter-swift/BUILD.bazel
Normal file
40
unified/extractor/tree-sitter-swift/BUILD.bazel
Normal file
@@ -0,0 +1,40 @@
|
||||
load("@rules_rust//cargo:defs.bzl", "cargo_build_script")
|
||||
load("@rules_rust//rust:defs.bzl", "rust_library")
|
||||
load("//misc/bazel/3rdparty/tree_sitter_extractors_deps:defs.bzl", "aliases", "all_crate_deps")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
# This will run the build script from the root of the workspace, and
|
||||
# collect the outputs.
|
||||
cargo_build_script(
|
||||
name = "tree-sitter-swift-build",
|
||||
srcs = ["bindings/rust/build.rs"],
|
||||
data = glob([
|
||||
"src/**",
|
||||
]),
|
||||
deps = all_crate_deps(
|
||||
build = True,
|
||||
),
|
||||
)
|
||||
|
||||
rust_library(
|
||||
name = "tree-sitter-swift",
|
||||
srcs = [
|
||||
"bindings/rust/lib.rs",
|
||||
],
|
||||
aliases = aliases(),
|
||||
compile_data = glob([
|
||||
"src/**",
|
||||
"queries/**",
|
||||
]) + [
|
||||
"grammar.js",
|
||||
],
|
||||
proc_macro_deps = all_crate_deps(
|
||||
proc_macro = True,
|
||||
),
|
||||
deps = [":tree-sitter-swift-build"] + all_crate_deps(
|
||||
normal = True,
|
||||
),
|
||||
)
|
||||
|
||||
exports_files(["Cargo.toml"])
|
||||
21
unified/extractor/tree-sitter-swift/Cargo.toml
Normal file
21
unified/extractor/tree-sitter-swift/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "tree-sitter-swift"
|
||||
description = "Swift grammar for the tree-sitter parsing library (vendored copy for the unified extractor)"
|
||||
version = "0.7.2"
|
||||
keywords = ["incremental", "parsing", "swift"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/alex-pinkus/tree-sitter-swift"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
|
||||
build = "bindings/rust/build.rs"
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
# When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
|
||||
[dependencies]
|
||||
tree-sitter-language = "0.1"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.2"
|
||||
21
unified/extractor/tree-sitter-swift/LICENSE
Normal file
21
unified/extractor/tree-sitter-swift/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 alex-pinkus
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
127
unified/extractor/tree-sitter-swift/README.md
Normal file
127
unified/extractor/tree-sitter-swift/README.md
Normal file
@@ -0,0 +1,127 @@
|
||||

|
||||
[](https://crates.io/crates/tree-sitter-swift)
|
||||
[](https://www.npmjs.com/package/tree-sitter-swift)
|
||||
[](https://github.com/alex-pinkus/tree-sitter-swift/actions/workflows/top-repos.yml)
|
||||
|
||||
# tree-sitter-swift
|
||||
|
||||
This contains a [`tree-sitter`](https://tree-sitter.github.io/tree-sitter) grammar for the Swift programming language.
|
||||
|
||||
## Getting started
|
||||
|
||||
To use this parser to parse Swift code, you'll want to depend on either the Rust crate or the NPM package.
|
||||
|
||||
### Rust
|
||||
|
||||
To use the Rust crate, you'll add this to your `Cargo.toml`:
|
||||
|
||||
```
|
||||
tree-sitter = "0.23.0"
|
||||
tree-sitter-swift = "=0.7.0"
|
||||
```
|
||||
|
||||
Then you can use a `tree-sitter` parser with the language declared here:
|
||||
|
||||
```
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(tree_sitter_swift::language())?;
|
||||
|
||||
// ...
|
||||
|
||||
let tree = parser.parse(&my_source_code, None)
|
||||
.ok_or_else(|| /* error handling code */)?;
|
||||
```
|
||||
|
||||
### Javascript
|
||||
|
||||
To use this from NPM, you'll add similar dependencies to `package.json`:
|
||||
|
||||
```
|
||||
"dependencies: {
|
||||
"tree-sitter-swift": "0.7.0",
|
||||
"tree-sitter": "^0.22.1"
|
||||
}
|
||||
```
|
||||
|
||||
Your usage of the parser will look like:
|
||||
|
||||
```
|
||||
const Parser = require("tree-sitter");
|
||||
const Swift = require("tree-sitter-swift");
|
||||
|
||||
const parser = new Parser();
|
||||
parser.setLanguage(Swift);
|
||||
|
||||
// ...
|
||||
|
||||
const tree = parser.parse(mySourceCode);
|
||||
```
|
||||
|
||||
### Editing the grammar
|
||||
|
||||
With this package checked out, a common workflow for editing the grammar will look something like:
|
||||
|
||||
1. Make a change to `grammar.ts`.
|
||||
2. Run `npm install && npm test` to see whether the change has had impact on existing parsing behavior. The default
|
||||
`npm test` target requires `valgrind` to be installed; if you do not have it installed, and do not wish to, you can
|
||||
substitute `tree-sitter test` directly.
|
||||
3. Run `tree-sitter parse` on some real Swift codebase and see whether (or where) it fails.
|
||||
4. Use any failures to create new corpus test cases.
|
||||
|
||||
## Contributions
|
||||
|
||||
All contributions to this repository are welcome.
|
||||
|
||||
If said contribution is to check generated files (e.g., `parser.c`) into the repository, be aware that your contribution will not be accepted. Make sure to read the [FAQ entry](https://github.com/alex-pinkus/tree-sitter-swift?tab=readme-ov-file#where-is-your-parserc) and the [prior](https://github.com/alex-pinkus/tree-sitter-swift/issues/362) [discussions](https://github.com/alex-pinkus/tree-sitter-swift/pull/315) and [compromises](https://github.com/alex-pinkus/tree-sitter-swift/issues/149) that have occurred already on this topic.
|
||||
|
||||
## Using tree-sitter-swift in Web Assembly
|
||||
|
||||
To use tree-sitter-swift as a language for the web bindings version tree-sitter, which will likely be a more modern version than the published node
|
||||
module. [see](https://github.com/tree-sitter/tree-sitter/blob/master/lib/binding_web/README.md). Follow the instructions below
|
||||
|
||||
1. Install the node modules `npm install web-tree-sitter tree-sitter-swift`
|
||||
2. Run the tree-sitter cli to create the wasm bundle
|
||||
```sh
|
||||
$ npx tree-sitter build-asm ./node_modules/tree-sitter
|
||||
```
|
||||
3. Boot tree-sitter wasm like this.
|
||||
|
||||
```js
|
||||
const Parser = require("web-tree-sitter");
|
||||
async function run() {
|
||||
//needs to happen first
|
||||
await Parser.init();
|
||||
//wait for the load of swift
|
||||
const Swift = await Parser.Language.load("./tree-sitter-swift.wasm");
|
||||
|
||||
const parser = new Parser();
|
||||
parser.setLanguage(Swift);
|
||||
|
||||
//Parse your swift code here.
|
||||
const tree = parser.parse('print("Hello, World!")');
|
||||
}
|
||||
//if you want to run this
|
||||
run().then(console.log, console.error);
|
||||
```
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
### Where is your `parser.c`?
|
||||
|
||||
This repository currently omits most of the code that is autogenerated during a build. This means, for instance, that
|
||||
`grammar.json` and `parser.c` are both only available following a build. It also significantly reduces noise during
|
||||
diffs.
|
||||
|
||||
The side benefit of not checking in `parser.c` is that you can guarantee backwards compatibility. Parsers generated by
|
||||
the tree-sitter CLI aren't always backwards compatible. If you need a parser, generate it yourself using the CLI; all
|
||||
the information to do so is available in this package. By doing that, you'll also know for sure that your parser version
|
||||
and your library version are compatible.
|
||||
|
||||
If you need a `parser.c`, and you don't care about the tree-sitter version, but you don't have a local setup that would
|
||||
allow you to obtain the parser, you can just download one from a recent workflow run in this package. To do so:
|
||||
|
||||
- Go to the [GitHub actions page](https://github.com/alex-pinkus/tree-sitter-swift/actions) for this
|
||||
repository.
|
||||
- Click on the "Publish `grammar.json` and `parser.c`" action for the appropriate commit.
|
||||
- Go down to `Artifacts` and click on `generated-parser-src`. All the relevant parser files will be available in your
|
||||
download.
|
||||
44
unified/extractor/tree-sitter-swift/binding.gyp
Normal file
44
unified/extractor/tree-sitter-swift/binding.gyp
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_swift_binding",
|
||||
"dependencies": [
|
||||
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
|
||||
],
|
||||
"include_dirs": [
|
||||
"src",
|
||||
],
|
||||
"sources": [
|
||||
"bindings/node/binding.cc",
|
||||
"src/parser.c",
|
||||
# NOTE: if your language has an external scanner, add it here.
|
||||
"src/scanner.c",
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c11",
|
||||
],
|
||||
"actions": [
|
||||
{
|
||||
"action_name": "wait_for_tree_sitter",
|
||||
"action": ["node", "scripts/wait-for-tree-sitter.js"],
|
||||
"inputs": [],
|
||||
"outputs": ["node_modules/tree-sitter-cli"]
|
||||
},
|
||||
{
|
||||
"action_name": "generate_header_files",
|
||||
"inputs": [
|
||||
"grammar.js",
|
||||
"node_modules/tree-sitter-cli"
|
||||
],
|
||||
"outputs": [
|
||||
"src/grammar.json",
|
||||
"src/node-types.json",
|
||||
"src/parser.c",
|
||||
"src/tree_sitter",
|
||||
],
|
||||
"action": ["tree-sitter", "generate", "--no-bindings"],
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
20
unified/extractor/tree-sitter-swift/bindings/node/binding.cc
Normal file
20
unified/extractor/tree-sitter-swift/bindings/node/binding.cc
Normal file
@@ -0,0 +1,20 @@
|
||||
#include <napi.h>
|
||||
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
||||
extern "C" TSLanguage *tree_sitter_swift();
|
||||
|
||||
// "tree-sitter", "language" hashed with BLAKE2
|
||||
const napi_type_tag LANGUAGE_TYPE_TAG = {
|
||||
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
|
||||
};
|
||||
|
||||
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
exports["name"] = Napi::String::New(env, "swift");
|
||||
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_swift());
|
||||
language.TypeTag(&LANGUAGE_TYPE_TAG);
|
||||
exports["language"] = language;
|
||||
return exports;
|
||||
}
|
||||
|
||||
NODE_API_MODULE(tree_sitter_swift_binding, Init)
|
||||
@@ -0,0 +1,7 @@
|
||||
const root = require("path").join(__dirname, "..", "..");
|
||||
|
||||
module.exports = require("node-gyp-build")(root);
|
||||
|
||||
try {
|
||||
module.exports.nodeTypeInfo = require("../../src/node-types.json");
|
||||
} catch (_) {}
|
||||
19
unified/extractor/tree-sitter-swift/bindings/rust/build.rs
Normal file
19
unified/extractor/tree-sitter-swift/bindings/rust/build.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
fn main() {
|
||||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.std("c11").include(src_dir);
|
||||
|
||||
#[cfg(target_env = "msvc")]
|
||||
c_config.flag("-utf-8");
|
||||
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
|
||||
let scanner_path = src_dir.join("scanner.c");
|
||||
c_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
|
||||
c_config.compile("tree-sitter-swift");
|
||||
}
|
||||
68
unified/extractor/tree-sitter-swift/bindings/rust/lib.rs
Normal file
68
unified/extractor/tree-sitter-swift/bindings/rust/lib.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! This crate provides Swift language support for the [tree-sitter][] parsing library.
|
||||
//!
|
||||
//! Typically, you will use the [language][language func] function to add this language to a
|
||||
//! tree-sitter [Parser][], and then use the parser to parse some code:
|
||||
//!
|
||||
//! ```
|
||||
//! let code = r#"
|
||||
//! "#;
|
||||
//! let mut parser = tree_sitter::Parser::new();
|
||||
//! let language = tree_sitter_swift::LANGUAGE;
|
||||
//! parser
|
||||
//! .set_language(&language.into())
|
||||
//! .expect("Error loading Swift parser");
|
||||
//! let tree = parser.parse(code, None).unwrap();
|
||||
//! assert!(!tree.root_node().has_error());
|
||||
//! ```
|
||||
//!
|
||||
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
//! [language func]: fn.language.html
|
||||
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
|
||||
//! [tree-sitter]: https://tree-sitter.github.io/
|
||||
|
||||
use tree_sitter_language::LanguageFn;
|
||||
|
||||
unsafe extern "C" {
|
||||
fn tree_sitter_swift() -> *const ();
|
||||
}
|
||||
|
||||
/// The tree-sitter [`LanguageFn`] for this grammar.
|
||||
pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_swift) };
|
||||
|
||||
/// The content of the [`node-types.json`][] file for this grammar.
|
||||
///
|
||||
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
|
||||
|
||||
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
|
||||
pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm");
|
||||
pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm");
|
||||
pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_can_load_grammar() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&super::LANGUAGE.into())
|
||||
.expect("Error loading Swift parser");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_can_parse_basic_file() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&super::LANGUAGE.into())
|
||||
.expect("Error loading Swift parser");
|
||||
|
||||
let tree = parser
|
||||
.parse("_ = \"Hello!\"\n", None)
|
||||
.expect("Unable to parse!");
|
||||
|
||||
assert_eq!(
|
||||
"(source_file (assignment target: (directly_assignable_expression (simple_identifier)) result: (line_string_literal text: (line_str_text))))",
|
||||
tree.root_node().to_sexp(),
|
||||
);
|
||||
}
|
||||
}
|
||||
2109
unified/extractor/tree-sitter-swift/grammar.js
Normal file
2109
unified/extractor/tree-sitter-swift/grammar.js
Normal file
File diff suppressed because it is too large
Load Diff
68
unified/extractor/tree-sitter-swift/package.json
Normal file
68
unified/extractor/tree-sitter-swift/package.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"name": "tree-sitter-swift",
|
||||
"version": "0.7.2",
|
||||
"description": "A tree-sitter grammar for the Swift programming language.",
|
||||
"main": "bindings/node/index.js",
|
||||
"types": "bindings/node",
|
||||
"scripts": {
|
||||
"install": "node-gyp-build",
|
||||
"prestart": "tree-sitter build --wasm",
|
||||
"start": "tree-sitter playground",
|
||||
"test": "node --test bindings/node/*_test.js"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
|
||||
},
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.swift",
|
||||
"file-types": [
|
||||
"swift"
|
||||
],
|
||||
"injection-regex": "swift",
|
||||
"highlights": "queries/highlights.scm",
|
||||
"locals": "queries/locals.scm",
|
||||
"injections": "queries/injections.scm"
|
||||
}
|
||||
],
|
||||
"keywords": [
|
||||
"parser",
|
||||
"swift"
|
||||
],
|
||||
"files": [
|
||||
"grammar.js",
|
||||
"tree-sitter.json",
|
||||
"binding.gyp",
|
||||
"prebuilds/**",
|
||||
"bindings/node/*",
|
||||
"queries/*",
|
||||
"scripts/*",
|
||||
"src/**"
|
||||
],
|
||||
"author": "Alex Pinkus <alex.pinkus@gmail.com>",
|
||||
"license": "MIT",
|
||||
"bugs": {
|
||||
"url": "https://github.com/alex-pinkus/tree-sitter-swift/issues"
|
||||
},
|
||||
"homepage": "https://github.com/alex-pinkus/tree-sitter-swift#readme",
|
||||
"dependencies": {
|
||||
"node-addon-api": "^8.0.0",
|
||||
"node-gyp-build": "^4.8.0",
|
||||
"tree-sitter-cli": "^0.23",
|
||||
"which": "2.0.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tree-sitter": "^0.22.1"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tree_sitter": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"node-gyp": "^10.0.1",
|
||||
"prebuildify": "^6.0.0",
|
||||
"prettier": "2.3.2"
|
||||
}
|
||||
}
|
||||
35
unified/extractor/tree-sitter-swift/queries/folds.scm
Normal file
35
unified/extractor/tree-sitter-swift/queries/folds.scm
Normal file
@@ -0,0 +1,35 @@
|
||||
; format-ignore
|
||||
[
|
||||
(protocol_body) ; protocol Foo { ... }
|
||||
(class_body) ; class Foo { ... }
|
||||
(enum_class_body) ; enum Foo { ... }
|
||||
(function_body) ; func Foo (...) {...}
|
||||
(computed_property) ; { ... }
|
||||
|
||||
(computed_getter) ; get { ... }
|
||||
(computed_setter) ; set { ... }
|
||||
|
||||
(do_statement)
|
||||
(if_statement)
|
||||
(for_statement)
|
||||
(switch_statement)
|
||||
(while_statement)
|
||||
(guard_statement)
|
||||
(switch_entry)
|
||||
|
||||
(type_parameters) ; x<Foo>
|
||||
(tuple_type) ; (...)
|
||||
(array_type) ; [String]
|
||||
(dictionary_type) ; [Foo: Bar]
|
||||
|
||||
(call_expression) ; callFunc(...)
|
||||
(tuple_expression) ; ( foo + bar )
|
||||
(array_literal) ; [ foo, bar ]
|
||||
(dictionary_literal) ; [ foo: bar, x: y ]
|
||||
(lambda_literal)
|
||||
(willset_didset_block)
|
||||
(willset_clause)
|
||||
(didset_clause)
|
||||
|
||||
(import_declaration)+
|
||||
] @fold
|
||||
336
unified/extractor/tree-sitter-swift/queries/highlights.scm
Normal file
336
unified/extractor/tree-sitter-swift/queries/highlights.scm
Normal file
@@ -0,0 +1,336 @@
|
||||
[
|
||||
"."
|
||||
";"
|
||||
":"
|
||||
","
|
||||
] @punctuation.delimiter
|
||||
|
||||
[
|
||||
"("
|
||||
")"
|
||||
"["
|
||||
"]"
|
||||
"{"
|
||||
"}"
|
||||
] @punctuation.bracket
|
||||
|
||||
; Identifiers
|
||||
(type_identifier) @type
|
||||
|
||||
[
|
||||
(self_expression)
|
||||
(super_expression)
|
||||
] @variable.builtin
|
||||
|
||||
; Declarations
|
||||
[
|
||||
"func"
|
||||
"deinit"
|
||||
] @keyword.function
|
||||
|
||||
[
|
||||
(visibility_modifier)
|
||||
(member_modifier)
|
||||
(function_modifier)
|
||||
(property_modifier)
|
||||
(parameter_modifier)
|
||||
(inheritance_modifier)
|
||||
(mutation_modifier)
|
||||
] @keyword.modifier
|
||||
|
||||
(simple_identifier) @variable
|
||||
|
||||
(function_declaration
|
||||
(simple_identifier) @function.method)
|
||||
|
||||
(protocol_function_declaration
|
||||
name: (simple_identifier) @function.method)
|
||||
|
||||
(init_declaration
|
||||
"init" @constructor)
|
||||
|
||||
(parameter
|
||||
external_name: (simple_identifier) @variable.parameter)
|
||||
|
||||
(parameter
|
||||
name: (simple_identifier) @variable.parameter)
|
||||
|
||||
(type_parameter
|
||||
(type_identifier) @variable.parameter)
|
||||
|
||||
(inheritance_constraint
|
||||
(identifier
|
||||
(simple_identifier) @variable.parameter))
|
||||
|
||||
(equality_constraint
|
||||
(identifier
|
||||
(simple_identifier) @variable.parameter))
|
||||
|
||||
[
|
||||
"protocol"
|
||||
"extension"
|
||||
"indirect"
|
||||
"nonisolated"
|
||||
"override"
|
||||
"convenience"
|
||||
"required"
|
||||
"some"
|
||||
"any"
|
||||
"weak"
|
||||
"unowned"
|
||||
"didSet"
|
||||
"willSet"
|
||||
"subscript"
|
||||
"let"
|
||||
"var"
|
||||
(throws)
|
||||
(where_keyword)
|
||||
(getter_specifier)
|
||||
(setter_specifier)
|
||||
(modify_specifier)
|
||||
(else)
|
||||
(as_operator)
|
||||
] @keyword
|
||||
|
||||
[
|
||||
"enum"
|
||||
"struct"
|
||||
"class"
|
||||
"typealias"
|
||||
] @keyword.type
|
||||
|
||||
[
|
||||
"async"
|
||||
"await"
|
||||
] @keyword.coroutine
|
||||
|
||||
(shebang_line) @keyword.directive
|
||||
|
||||
(class_body
|
||||
(property_declaration
|
||||
(pattern
|
||||
(simple_identifier) @variable.member)))
|
||||
|
||||
(protocol_property_declaration
|
||||
(pattern
|
||||
(simple_identifier) @variable.member))
|
||||
|
||||
(navigation_expression
|
||||
(navigation_suffix
|
||||
(simple_identifier) @variable.member))
|
||||
|
||||
(value_argument
|
||||
name: (value_argument_label
|
||||
(simple_identifier) @variable.member))
|
||||
|
||||
(import_declaration
|
||||
"import" @keyword.import)
|
||||
|
||||
(enum_entry
|
||||
"case" @keyword)
|
||||
|
||||
(modifiers
|
||||
(attribute
|
||||
"@" @attribute
|
||||
(user_type
|
||||
(type_identifier) @attribute)))
|
||||
|
||||
; Function calls
|
||||
(call_expression
|
||||
(simple_identifier) @function.call) ; foo()
|
||||
|
||||
(call_expression
|
||||
; foo.bar.baz(): highlight the baz()
|
||||
(navigation_expression
|
||||
(navigation_suffix
|
||||
(simple_identifier) @function.call)))
|
||||
|
||||
(call_expression
|
||||
(prefix_expression
|
||||
(simple_identifier) @function.call)) ; .foo()
|
||||
|
||||
((navigation_expression
|
||||
(simple_identifier) @type) ; SomeType.method(): highlight SomeType as a type
|
||||
(#match? @type "^[A-Z]"))
|
||||
|
||||
(directive) @keyword.directive
|
||||
|
||||
; See https://docs.swift.org/swift-book/documentation/the-swift-programming-language/lexicalstructure/#Keywords-and-Punctuation
|
||||
[
|
||||
(diagnostic)
|
||||
(availability_condition)
|
||||
(playground_literal)
|
||||
(key_path_string_expression)
|
||||
(selector_expression)
|
||||
(external_macro_definition)
|
||||
] @function.macro
|
||||
|
||||
(special_literal) @constant.macro
|
||||
|
||||
; Statements
|
||||
(for_statement
|
||||
"for" @keyword.repeat)
|
||||
|
||||
(for_statement
|
||||
"in" @keyword.repeat)
|
||||
|
||||
[
|
||||
"while"
|
||||
"repeat"
|
||||
"continue"
|
||||
"break"
|
||||
] @keyword.repeat
|
||||
|
||||
(guard_statement
|
||||
"guard" @keyword.conditional)
|
||||
|
||||
(if_statement
|
||||
"if" @keyword.conditional)
|
||||
|
||||
(switch_statement
|
||||
"switch" @keyword.conditional)
|
||||
|
||||
(switch_entry
|
||||
"case" @keyword)
|
||||
|
||||
(switch_entry
|
||||
"fallthrough" @keyword)
|
||||
|
||||
(switch_entry
|
||||
(default_keyword) @keyword)
|
||||
|
||||
"return" @keyword.return
|
||||
|
||||
(ternary_expression
|
||||
[
|
||||
"?"
|
||||
":"
|
||||
] @keyword.conditional.ternary)
|
||||
|
||||
[
|
||||
(try_operator)
|
||||
"do"
|
||||
(throw_keyword)
|
||||
(catch_keyword)
|
||||
] @keyword.exception
|
||||
|
||||
(statement_label) @label
|
||||
|
||||
; Comments
|
||||
[
|
||||
(comment)
|
||||
(multiline_comment)
|
||||
] @comment @spell
|
||||
|
||||
((comment) @comment.documentation
|
||||
(#match? @comment.documentation "^///[^/]"))
|
||||
|
||||
((comment) @comment.documentation
|
||||
(#match? @comment.documentation "^///$"))
|
||||
|
||||
((multiline_comment) @comment.documentation
|
||||
(#match? @comment.documentation "^/[*][*][^*].*[*]/$"))
|
||||
|
||||
; String literals
|
||||
(line_str_text) @string
|
||||
|
||||
(str_escaped_char) @string.escape
|
||||
|
||||
(multi_line_str_text) @string
|
||||
|
||||
(raw_str_part) @string
|
||||
|
||||
(raw_str_end_part) @string
|
||||
|
||||
(line_string_literal
|
||||
[
|
||||
"\\("
|
||||
")"
|
||||
] @punctuation.special)
|
||||
|
||||
(multi_line_string_literal
|
||||
[
|
||||
"\\("
|
||||
")"
|
||||
] @punctuation.special)
|
||||
|
||||
(raw_str_interpolation
|
||||
[
|
||||
(raw_str_interpolation_start)
|
||||
")"
|
||||
] @punctuation.special)
|
||||
|
||||
[
|
||||
"\""
|
||||
"\"\"\""
|
||||
] @string
|
||||
|
||||
; Lambda literals
|
||||
(lambda_literal
|
||||
"in" @keyword.operator)
|
||||
|
||||
; Basic literals
|
||||
[
|
||||
(integer_literal)
|
||||
(hex_literal)
|
||||
(oct_literal)
|
||||
(bin_literal)
|
||||
] @number
|
||||
|
||||
(real_literal) @number.float
|
||||
|
||||
(boolean_literal) @boolean
|
||||
|
||||
"nil" @constant.builtin
|
||||
|
||||
(wildcard_pattern) @character.special
|
||||
|
||||
; Regex literals
|
||||
(regex_literal) @string.regexp
|
||||
|
||||
; Operators
|
||||
(custom_operator) @operator
|
||||
|
||||
[
|
||||
"+"
|
||||
"-"
|
||||
"*"
|
||||
"/"
|
||||
"%"
|
||||
"="
|
||||
"+="
|
||||
"-="
|
||||
"*="
|
||||
"/="
|
||||
"<"
|
||||
">"
|
||||
"<<"
|
||||
">>"
|
||||
"<="
|
||||
">="
|
||||
"++"
|
||||
"--"
|
||||
"^"
|
||||
"&"
|
||||
"&&"
|
||||
"|"
|
||||
"||"
|
||||
"~"
|
||||
"%="
|
||||
"!="
|
||||
"!=="
|
||||
"=="
|
||||
"==="
|
||||
"?"
|
||||
"??"
|
||||
"->"
|
||||
"..<"
|
||||
"..."
|
||||
(bang)
|
||||
] @operator
|
||||
|
||||
(type_arguments
|
||||
[
|
||||
"<"
|
||||
">"
|
||||
] @punctuation.bracket)
|
||||
123
unified/extractor/tree-sitter-swift/queries/indents.scm
Normal file
123
unified/extractor/tree-sitter-swift/queries/indents.scm
Normal file
@@ -0,0 +1,123 @@
|
||||
; format-ignore
|
||||
[
|
||||
; ... refers to the section that will get affected by this indent.begin capture
|
||||
(protocol_body) ; protocol Foo { ... }
|
||||
(class_body) ; class Foo { ... }
|
||||
(enum_class_body) ; enum Foo { ... }
|
||||
(function_declaration) ; func Foo (...) {...}
|
||||
(init_declaration) ; init(...) {...}
|
||||
(deinit_declaration) ; deinit {...}
|
||||
(computed_property) ; { ... }
|
||||
(subscript_declaration) ; subscript Foo(...) { ... }
|
||||
|
||||
(computed_getter) ; get { ... }
|
||||
(computed_setter) ; set { ... }
|
||||
|
||||
(assignment) ; a = b
|
||||
|
||||
(control_transfer_statement) ; return ...
|
||||
(for_statement)
|
||||
(while_statement)
|
||||
(repeat_while_statement)
|
||||
(do_statement)
|
||||
(if_statement)
|
||||
(switch_statement)
|
||||
(guard_statement)
|
||||
|
||||
(type_parameters) ; x<Foo>
|
||||
(tuple_type) ; (...)
|
||||
(array_type) ; [String]
|
||||
(dictionary_type) ; [Foo: Bar]
|
||||
|
||||
(call_expression) ; callFunc(...)
|
||||
(tuple_expression) ; ( foo + bar )
|
||||
(array_literal) ; [ foo, bar ]
|
||||
(dictionary_literal) ; [ foo: bar, x: y ]
|
||||
(lambda_literal)
|
||||
(willset_didset_block)
|
||||
(willset_clause)
|
||||
(didset_clause)
|
||||
] @indent.begin
|
||||
|
||||
(init_declaration) @indent.begin
|
||||
|
||||
(init_declaration
|
||||
[
|
||||
"init"
|
||||
"("
|
||||
] @indent.branch)
|
||||
|
||||
; indentation for init parameters
|
||||
(init_declaration
|
||||
")" @indent.branch @indent.end)
|
||||
|
||||
(init_declaration
|
||||
(parameter) @indent.begin
|
||||
(#set! indent.immediate))
|
||||
|
||||
; @something(...)
|
||||
(modifiers
|
||||
(attribute) @indent.begin)
|
||||
|
||||
(function_declaration
|
||||
(modifiers
|
||||
.
|
||||
(attribute)
|
||||
(_)* @indent.branch)
|
||||
.
|
||||
_ @indent.branch
|
||||
(#not-kind-eq? @indent.branch "type_parameters" "parameter"))
|
||||
|
||||
(ERROR
|
||||
[
|
||||
"<"
|
||||
"{"
|
||||
"("
|
||||
"["
|
||||
]) @indent.begin
|
||||
|
||||
; if-elseif
|
||||
(if_statement
|
||||
(if_statement) @indent.dedent)
|
||||
|
||||
; case Foo:
|
||||
; default Foo:
|
||||
; @attribute default Foo:
|
||||
(switch_entry
|
||||
.
|
||||
_ @indent.branch)
|
||||
|
||||
(function_declaration
|
||||
")" @indent.branch)
|
||||
|
||||
(type_parameters
|
||||
">" @indent.branch @indent.end .)
|
||||
|
||||
(tuple_expression
|
||||
")" @indent.branch @indent.end)
|
||||
|
||||
(value_arguments
|
||||
")" @indent.branch @indent.end)
|
||||
|
||||
(tuple_type
|
||||
")" @indent.branch @indent.end)
|
||||
|
||||
(modifiers
|
||||
(attribute
|
||||
")" @indent.branch @indent.end))
|
||||
|
||||
[
|
||||
"}"
|
||||
"]"
|
||||
] @indent.branch @indent.end
|
||||
|
||||
[
|
||||
; (ERROR)
|
||||
(comment)
|
||||
(multiline_comment)
|
||||
(raw_str_part)
|
||||
(multi_line_string_literal)
|
||||
] @indent.auto
|
||||
|
||||
(directive) @indent.ignore
|
||||
|
||||
10
unified/extractor/tree-sitter-swift/queries/injections.scm
Normal file
10
unified/extractor/tree-sitter-swift/queries/injections.scm
Normal file
@@ -0,0 +1,10 @@
|
||||
; Parse regex syntax within regex literals
|
||||
|
||||
((regex_literal) @injection.content
|
||||
(#set! injection.language "regex"))
|
||||
|
||||
([
|
||||
(comment)
|
||||
(multiline_comment)
|
||||
] @injection.content
|
||||
(#set! injection.language "comment"))
|
||||
23
unified/extractor/tree-sitter-swift/queries/locals.scm
Normal file
23
unified/extractor/tree-sitter-swift/queries/locals.scm
Normal file
@@ -0,0 +1,23 @@
|
||||
(import_declaration
|
||||
(identifier) @local.definition.import)
|
||||
|
||||
(function_declaration
|
||||
name: (simple_identifier) @local.definition.function)
|
||||
|
||||
; Scopes
|
||||
[
|
||||
(statements)
|
||||
(for_statement)
|
||||
(while_statement)
|
||||
(repeat_while_statement)
|
||||
(do_statement)
|
||||
(if_statement)
|
||||
(guard_statement)
|
||||
(switch_statement)
|
||||
(property_declaration)
|
||||
(function_declaration)
|
||||
(class_declaration)
|
||||
(protocol_declaration)
|
||||
] @local.scope
|
||||
|
||||
|
||||
66
unified/extractor/tree-sitter-swift/queries/outline.scm
Normal file
66
unified/extractor/tree-sitter-swift/queries/outline.scm
Normal file
@@ -0,0 +1,66 @@
|
||||
(protocol_declaration
|
||||
declaration_kind: "protocol" @name
|
||||
.
|
||||
_ * @name
|
||||
.
|
||||
body: (protocol_body)
|
||||
) @item
|
||||
|
||||
(class_declaration
|
||||
declaration_kind: (
|
||||
[
|
||||
"actor"
|
||||
"class"
|
||||
"extension"
|
||||
"enum"
|
||||
"struct"
|
||||
]
|
||||
) @name
|
||||
.
|
||||
_ * @name
|
||||
.
|
||||
body: (_)
|
||||
) @item
|
||||
|
||||
(init_declaration
|
||||
name: "init" @name
|
||||
.
|
||||
_ * @name
|
||||
.
|
||||
body: (function_body)
|
||||
) @item
|
||||
|
||||
(deinit_declaration
|
||||
"deinit" @name) @item
|
||||
|
||||
(function_declaration
|
||||
"func" @name
|
||||
.
|
||||
_ * @name
|
||||
.
|
||||
body: (function_body)
|
||||
) @item
|
||||
|
||||
(class_body
|
||||
(property_declaration
|
||||
(value_binding_pattern) @name
|
||||
name: (pattern) @name
|
||||
(type_annotation)? @name
|
||||
) @item
|
||||
)
|
||||
|
||||
(enum_class_body
|
||||
(property_declaration
|
||||
(value_binding_pattern) @name
|
||||
name: (pattern) @name
|
||||
(type_annotation)? @name
|
||||
) @item
|
||||
)
|
||||
|
||||
(
|
||||
(protocol_function_declaration) @name
|
||||
) @item
|
||||
|
||||
(
|
||||
(protocol_property_declaration) @name
|
||||
) @item
|
||||
51
unified/extractor/tree-sitter-swift/queries/tags.scm
Normal file
51
unified/extractor/tree-sitter-swift/queries/tags.scm
Normal file
@@ -0,0 +1,51 @@
|
||||
(class_declaration
|
||||
name: (type_identifier) @name) @definition.class
|
||||
|
||||
(protocol_declaration
|
||||
name: (type_identifier) @name) @definition.interface
|
||||
|
||||
(class_declaration
|
||||
(class_body
|
||||
[
|
||||
(function_declaration
|
||||
name: (simple_identifier) @name
|
||||
)
|
||||
(subscript_declaration
|
||||
(parameter (simple_identifier) @name)
|
||||
)
|
||||
(init_declaration "init" @name)
|
||||
(deinit_declaration "deinit" @name)
|
||||
]
|
||||
)
|
||||
) @definition.method
|
||||
|
||||
(protocol_declaration
|
||||
(protocol_body
|
||||
[
|
||||
(protocol_function_declaration
|
||||
name: (simple_identifier) @name
|
||||
)
|
||||
(subscript_declaration
|
||||
(parameter (simple_identifier) @name)
|
||||
)
|
||||
(init_declaration "init" @name)
|
||||
]
|
||||
)
|
||||
) @definition.method
|
||||
|
||||
(class_declaration
|
||||
(class_body
|
||||
[
|
||||
(property_declaration
|
||||
(pattern (simple_identifier) @name)
|
||||
)
|
||||
]
|
||||
)
|
||||
) @definition.property
|
||||
|
||||
(property_declaration
|
||||
(pattern (simple_identifier) @name)
|
||||
) @definition.property
|
||||
|
||||
(function_declaration
|
||||
name: (simple_identifier) @name) @definition.function
|
||||
19
unified/extractor/tree-sitter-swift/queries/textobjects.scm
Normal file
19
unified/extractor/tree-sitter-swift/queries/textobjects.scm
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
|
||||
; MARK: Structure
|
||||
|
||||
(function_declaration
|
||||
body: (_) @function.inside) @function.around
|
||||
|
||||
; TODO: Classes/structs/enums
|
||||
|
||||
|
||||
; MARK: Tests
|
||||
|
||||
; Only matches prefix test. Other conventions
|
||||
; might be nice to add!
|
||||
(function_declaration
|
||||
name: (simple_identifier) @_name
|
||||
(#match? @_name "^test")
|
||||
)
|
||||
|
||||
11386
unified/extractor/tree-sitter-swift/src/grammar.json
Normal file
11386
unified/extractor/tree-sitter-swift/src/grammar.json
Normal file
File diff suppressed because it is too large
Load Diff
30782
unified/extractor/tree-sitter-swift/src/node-types.json
Normal file
30782
unified/extractor/tree-sitter-swift/src/node-types.json
Normal file
File diff suppressed because it is too large
Load Diff
552722
unified/extractor/tree-sitter-swift/src/parser.c
Normal file
552722
unified/extractor/tree-sitter-swift/src/parser.c
Normal file
File diff suppressed because it is too large
Load Diff
929
unified/extractor/tree-sitter-swift/src/scanner.c
Normal file
929
unified/extractor/tree-sitter-swift/src/scanner.c
Normal file
@@ -0,0 +1,929 @@
|
||||
#include "tree_sitter/parser.h"
|
||||
#include <string.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#define TOKEN_COUNT 33
|
||||
|
||||
enum TokenType {
|
||||
BLOCK_COMMENT,
|
||||
RAW_STR_PART,
|
||||
RAW_STR_CONTINUING_INDICATOR,
|
||||
RAW_STR_END_PART,
|
||||
IMPLICIT_SEMI,
|
||||
EXPLICIT_SEMI,
|
||||
ARROW_OPERATOR,
|
||||
DOT_OPERATOR,
|
||||
CONJUNCTION_OPERATOR,
|
||||
DISJUNCTION_OPERATOR,
|
||||
NIL_COALESCING_OPERATOR,
|
||||
EQUAL_SIGN,
|
||||
EQ_EQ,
|
||||
PLUS_THEN_WS,
|
||||
MINUS_THEN_WS,
|
||||
BANG,
|
||||
THROWS_KEYWORD,
|
||||
RETHROWS_KEYWORD,
|
||||
DEFAULT_KEYWORD,
|
||||
WHERE_KEYWORD,
|
||||
ELSE_KEYWORD,
|
||||
CATCH_KEYWORD,
|
||||
AS_KEYWORD,
|
||||
AS_QUEST,
|
||||
AS_BANG,
|
||||
ASYNC_KEYWORD,
|
||||
CUSTOM_OPERATOR,
|
||||
HASH_SYMBOL,
|
||||
DIRECTIVE_IF,
|
||||
DIRECTIVE_ELSEIF,
|
||||
DIRECTIVE_ELSE,
|
||||
DIRECTIVE_ENDIF,
|
||||
FAKE_TRY_BANG
|
||||
};
|
||||
|
||||
#define OPERATOR_COUNT 20
|
||||
|
||||
const char* OPERATORS[OPERATOR_COUNT] = {
|
||||
"->",
|
||||
".",
|
||||
"&&",
|
||||
"||",
|
||||
"??",
|
||||
"=",
|
||||
"==",
|
||||
"+",
|
||||
"-",
|
||||
"!",
|
||||
"throws",
|
||||
"rethrows",
|
||||
"default",
|
||||
"where",
|
||||
"else",
|
||||
"catch",
|
||||
"as",
|
||||
"as?",
|
||||
"as!",
|
||||
"async"
|
||||
};
|
||||
|
||||
enum IllegalTerminatorGroup {
|
||||
ALPHANUMERIC,
|
||||
OPERATOR_SYMBOLS,
|
||||
OPERATOR_OR_DOT,
|
||||
NON_WHITESPACE
|
||||
};
|
||||
|
||||
const enum IllegalTerminatorGroup OP_ILLEGAL_TERMINATORS[OPERATOR_COUNT] = {
|
||||
OPERATOR_SYMBOLS, // ->
|
||||
OPERATOR_OR_DOT, // .
|
||||
OPERATOR_SYMBOLS, // &&
|
||||
OPERATOR_SYMBOLS, // ||
|
||||
OPERATOR_SYMBOLS, // ??
|
||||
OPERATOR_SYMBOLS, // =
|
||||
OPERATOR_SYMBOLS, // ==
|
||||
NON_WHITESPACE, // +
|
||||
NON_WHITESPACE, // -
|
||||
OPERATOR_SYMBOLS, // !
|
||||
ALPHANUMERIC, // throws
|
||||
ALPHANUMERIC, // rethrows
|
||||
ALPHANUMERIC, // default
|
||||
ALPHANUMERIC, // where
|
||||
ALPHANUMERIC, // else
|
||||
ALPHANUMERIC, // catch
|
||||
ALPHANUMERIC, // as
|
||||
OPERATOR_SYMBOLS, // as?
|
||||
OPERATOR_SYMBOLS, // as!
|
||||
ALPHANUMERIC // async
|
||||
};
|
||||
|
||||
const enum TokenType OP_SYMBOLS[OPERATOR_COUNT] = {
|
||||
ARROW_OPERATOR,
|
||||
DOT_OPERATOR,
|
||||
CONJUNCTION_OPERATOR,
|
||||
DISJUNCTION_OPERATOR,
|
||||
NIL_COALESCING_OPERATOR,
|
||||
EQUAL_SIGN,
|
||||
EQ_EQ,
|
||||
PLUS_THEN_WS,
|
||||
MINUS_THEN_WS,
|
||||
BANG,
|
||||
THROWS_KEYWORD,
|
||||
RETHROWS_KEYWORD,
|
||||
DEFAULT_KEYWORD,
|
||||
WHERE_KEYWORD,
|
||||
ELSE_KEYWORD,
|
||||
CATCH_KEYWORD,
|
||||
AS_KEYWORD,
|
||||
AS_QUEST,
|
||||
AS_BANG,
|
||||
ASYNC_KEYWORD
|
||||
};
|
||||
|
||||
const uint64_t OP_SYMBOL_SUPPRESSOR[OPERATOR_COUNT] = {
|
||||
0, // ARROW_OPERATOR,
|
||||
0, // DOT_OPERATOR,
|
||||
0, // CONJUNCTION_OPERATOR,
|
||||
0, // DISJUNCTION_OPERATOR,
|
||||
0, // NIL_COALESCING_OPERATOR,
|
||||
0, // EQUAL_SIGN,
|
||||
0, // EQ_EQ,
|
||||
0, // PLUS_THEN_WS,
|
||||
0, // MINUS_THEN_WS,
|
||||
1UL << FAKE_TRY_BANG, // BANG,
|
||||
0, // THROWS_KEYWORD,
|
||||
0, // RETHROWS_KEYWORD,
|
||||
0, // DEFAULT_KEYWORD,
|
||||
0, // WHERE_KEYWORD,
|
||||
0, // ELSE_KEYWORD,
|
||||
0, // CATCH_KEYWORD,
|
||||
0, // AS_KEYWORD,
|
||||
0, // AS_QUEST,
|
||||
0, // AS_BANG,
|
||||
0, // ASYNC_KEYWORD
|
||||
};
|
||||
|
||||
#define RESERVED_OP_COUNT 31
|
||||
|
||||
const char* RESERVED_OPS[RESERVED_OP_COUNT] = {
|
||||
"/",
|
||||
"=",
|
||||
"-",
|
||||
"+",
|
||||
"!",
|
||||
"*",
|
||||
"%",
|
||||
"<",
|
||||
">",
|
||||
"&",
|
||||
"|",
|
||||
"^",
|
||||
"?",
|
||||
"~",
|
||||
".",
|
||||
"..",
|
||||
"->",
|
||||
"/*",
|
||||
"*/",
|
||||
"+=",
|
||||
"-=",
|
||||
"*=",
|
||||
"/=",
|
||||
"%=",
|
||||
">>",
|
||||
"<<",
|
||||
"++",
|
||||
"--",
|
||||
"===",
|
||||
"...",
|
||||
"..<"
|
||||
};
|
||||
|
||||
static bool is_cross_semi_token(enum TokenType op) {
|
||||
switch(op) {
|
||||
case ARROW_OPERATOR:
|
||||
case DOT_OPERATOR:
|
||||
case CONJUNCTION_OPERATOR:
|
||||
case DISJUNCTION_OPERATOR:
|
||||
case NIL_COALESCING_OPERATOR:
|
||||
case EQUAL_SIGN:
|
||||
case EQ_EQ:
|
||||
case PLUS_THEN_WS:
|
||||
case MINUS_THEN_WS:
|
||||
case THROWS_KEYWORD:
|
||||
case RETHROWS_KEYWORD:
|
||||
case DEFAULT_KEYWORD:
|
||||
case WHERE_KEYWORD:
|
||||
case ELSE_KEYWORD:
|
||||
case CATCH_KEYWORD:
|
||||
case AS_KEYWORD:
|
||||
case AS_QUEST:
|
||||
case AS_BANG:
|
||||
case ASYNC_KEYWORD:
|
||||
case CUSTOM_OPERATOR:
|
||||
return true;
|
||||
case BANG:
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#define NON_CONSUMING_CROSS_SEMI_CHAR_COUNT 3
|
||||
const uint32_t NON_CONSUMING_CROSS_SEMI_CHARS[NON_CONSUMING_CROSS_SEMI_CHAR_COUNT] = { '?', ':', '{' };
|
||||
|
||||
/**
|
||||
* All possible results of having performed some sort of parsing.
|
||||
*
|
||||
* A parser can return a result along two dimensions:
|
||||
* 1. Should the scanner continue trying to find another result?
|
||||
* 2. Was some result produced by this parsing attempt?
|
||||
*
|
||||
* These are flattened into a single enum together. When the function returns one of the `TOKEN_FOUND` cases, it
|
||||
* will always populate its `symbol_result` field. When it returns one of the `STOP_PARSING` cases, callers should
|
||||
* immediately return (with the value, if there is one).
|
||||
*/
|
||||
enum ParseDirective {
|
||||
CONTINUE_PARSING_NOTHING_FOUND,
|
||||
CONTINUE_PARSING_TOKEN_FOUND,
|
||||
CONTINUE_PARSING_SLASH_CONSUMED,
|
||||
STOP_PARSING_NOTHING_FOUND,
|
||||
STOP_PARSING_TOKEN_FOUND,
|
||||
STOP_PARSING_END_OF_FILE
|
||||
};
|
||||
|
||||
struct ScannerState {
|
||||
uint32_t ongoing_raw_str_hash_count;
|
||||
};
|
||||
|
||||
void *tree_sitter_swift_external_scanner_create() {
|
||||
return calloc(1, sizeof(struct ScannerState));
|
||||
}
|
||||
|
||||
void tree_sitter_swift_external_scanner_destroy(void *payload) {
|
||||
free(payload);
|
||||
}
|
||||
|
||||
void tree_sitter_swift_external_scanner_reset(void *payload) {
|
||||
struct ScannerState *state = (struct ScannerState *)payload;
|
||||
state->ongoing_raw_str_hash_count = 0;
|
||||
}
|
||||
|
||||
unsigned tree_sitter_swift_external_scanner_serialize(void *payload, char *buffer) {
|
||||
struct ScannerState *state = (struct ScannerState *)payload;
|
||||
uint32_t hash_count = state->ongoing_raw_str_hash_count;
|
||||
buffer[0] = (hash_count >> 24) & 0xff;
|
||||
buffer[1] = (hash_count >> 16) & 0xff;
|
||||
buffer[2] = (hash_count >> 8) & 0xff;
|
||||
buffer[3] = (hash_count) & 0xff;
|
||||
return 4;
|
||||
}
|
||||
|
||||
void tree_sitter_swift_external_scanner_deserialize(
|
||||
void *payload,
|
||||
const char *buffer,
|
||||
unsigned length
|
||||
) {
|
||||
if (length < 4) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t hash_count = (
|
||||
(((uint32_t) buffer[0]) << 24) |
|
||||
(((uint32_t) buffer[1]) << 16) |
|
||||
(((uint32_t) buffer[2]) << 8) |
|
||||
(((uint32_t) buffer[3]))
|
||||
);
|
||||
struct ScannerState *state = (struct ScannerState *)payload;
|
||||
state->ongoing_raw_str_hash_count = hash_count;
|
||||
}
|
||||
|
||||
static void advance(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
static bool should_treat_as_wspace(int32_t character) {
|
||||
return iswspace(character) || (((int32_t) ';') == character);
|
||||
}
|
||||
|
||||
static int32_t encountered_op_count(bool *encountered_operator) {
|
||||
int32_t encountered = 0;
|
||||
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
|
||||
if (encountered_operator[op_idx]) {
|
||||
encountered++;
|
||||
}
|
||||
}
|
||||
|
||||
return encountered;
|
||||
}
|
||||
|
||||
static bool any_reserved_ops(uint8_t *encountered_reserved_ops) {
|
||||
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
|
||||
if (encountered_reserved_ops[op_idx] == 2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_legal_custom_operator(
|
||||
int32_t char_idx,
|
||||
int32_t first_char,
|
||||
int32_t cur_char
|
||||
) {
|
||||
bool is_first_char = !char_idx;
|
||||
switch (cur_char) {
|
||||
case '=':
|
||||
case '-':
|
||||
case '+':
|
||||
case '!':
|
||||
case '%':
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '|':
|
||||
case '^':
|
||||
case '?':
|
||||
case '~':
|
||||
return true;
|
||||
case '.':
|
||||
// Grammar allows `.` for any operator that starts with `.`
|
||||
return is_first_char || first_char == '.';
|
||||
case '*':
|
||||
case '/':
|
||||
// Not listed in the grammar, but `/*` and `//` can't be the start of an operator since they start comments
|
||||
return char_idx != 1 || first_char != '/';
|
||||
default:
|
||||
if (
|
||||
(cur_char >= 0x00A1 && cur_char <= 0x00A7) ||
|
||||
(cur_char == 0x00A9) ||
|
||||
(cur_char == 0x00AB) ||
|
||||
(cur_char == 0x00AC) ||
|
||||
(cur_char == 0x00AE) ||
|
||||
(cur_char >= 0x00B0 && cur_char <= 0x00B1) ||
|
||||
(cur_char == 0x00B6) ||
|
||||
(cur_char == 0x00BB) ||
|
||||
(cur_char == 0x00BF) ||
|
||||
(cur_char == 0x00D7) ||
|
||||
(cur_char == 0x00F7) ||
|
||||
(cur_char >= 0x2016 && cur_char <= 0x2017) ||
|
||||
(cur_char >= 0x2020 && cur_char <= 0x2027) ||
|
||||
(cur_char >= 0x2030 && cur_char <= 0x203E) ||
|
||||
(cur_char >= 0x2041 && cur_char <= 0x2053) ||
|
||||
(cur_char >= 0x2055 && cur_char <= 0x205E) ||
|
||||
(cur_char >= 0x2190 && cur_char <= 0x23FF) ||
|
||||
(cur_char >= 0x2500 && cur_char <= 0x2775) ||
|
||||
(cur_char >= 0x2794 && cur_char <= 0x2BFF) ||
|
||||
(cur_char >= 0x2E00 && cur_char <= 0x2E7F) ||
|
||||
(cur_char >= 0x3001 && cur_char <= 0x3003) ||
|
||||
(cur_char >= 0x3008 && cur_char <= 0x3020) ||
|
||||
(cur_char == 0x3030)
|
||||
) {
|
||||
return true;
|
||||
} else if (
|
||||
(cur_char >= 0x0300 && cur_char <= 0x036f) ||
|
||||
(cur_char >= 0x1DC0 && cur_char <= 0x1DFF) ||
|
||||
(cur_char >= 0x20D0 && cur_char <= 0x20FF) ||
|
||||
(cur_char >= 0xFE00 && cur_char <= 0xFE0F) ||
|
||||
(cur_char >= 0xFE20 && cur_char <= 0xFE2F) ||
|
||||
(cur_char >= 0xE0100 && cur_char <= 0xE01EF)
|
||||
) {
|
||||
return !is_first_char;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool eat_operators(
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols,
|
||||
bool mark_end,
|
||||
const int32_t prior_char,
|
||||
enum TokenType *symbol_result
|
||||
) {
|
||||
bool possible_operators[OPERATOR_COUNT];
|
||||
uint8_t reserved_operators[RESERVED_OP_COUNT];
|
||||
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
|
||||
possible_operators[op_idx] = valid_symbols[OP_SYMBOLS[op_idx]] && (!prior_char || OPERATORS[op_idx][0] == prior_char);
|
||||
}
|
||||
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
|
||||
reserved_operators[op_idx] = !prior_char || RESERVED_OPS[op_idx][0] == prior_char;
|
||||
}
|
||||
|
||||
bool possible_custom_operator = valid_symbols[CUSTOM_OPERATOR];
|
||||
int32_t first_char = prior_char ? prior_char : lexer->lookahead;
|
||||
int32_t last_examined_char = first_char;
|
||||
|
||||
int32_t str_idx = prior_char ? 1 : 0;
|
||||
int32_t full_match = -1;
|
||||
while(true) {
|
||||
for (int op_idx = 0; op_idx < OPERATOR_COUNT; op_idx++) {
|
||||
if (!possible_operators[op_idx]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (OPERATORS[op_idx][str_idx] == '\0') {
|
||||
// Make sure that the operator is allowed to have the next character as its lookahead.
|
||||
enum IllegalTerminatorGroup illegal_terminators = OP_ILLEGAL_TERMINATORS[op_idx];
|
||||
switch (lexer->lookahead) {
|
||||
// See "Operators":
|
||||
// https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html#ID418
|
||||
case '/':
|
||||
case '=':
|
||||
case '-':
|
||||
case '+':
|
||||
case '!':
|
||||
case '*':
|
||||
case '%':
|
||||
case '<':
|
||||
case '>':
|
||||
case '&':
|
||||
case '|':
|
||||
case '^':
|
||||
case '?':
|
||||
case '~':
|
||||
if (illegal_terminators == OPERATOR_SYMBOLS) {
|
||||
break;
|
||||
} // Otherwise, intentionally fall through to the OPERATOR_OR_DOT case
|
||||
// fall through
|
||||
case '.':
|
||||
if (illegal_terminators == OPERATOR_OR_DOT) {
|
||||
break;
|
||||
} // Otherwise, fall through to DEFAULT which checks its groups directly
|
||||
// fall through
|
||||
default:
|
||||
if (iswalnum(lexer->lookahead) && illegal_terminators == ALPHANUMERIC) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!iswspace(lexer->lookahead) && illegal_terminators == NON_WHITESPACE) {
|
||||
break;
|
||||
}
|
||||
|
||||
full_match = op_idx;
|
||||
if (mark_end) {
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
possible_operators[op_idx] = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (OPERATORS[op_idx][str_idx] != lexer->lookahead) {
|
||||
possible_operators[op_idx] = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (int op_idx = 0; op_idx < RESERVED_OP_COUNT; op_idx++) {
|
||||
if (!reserved_operators[op_idx]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RESERVED_OPS[op_idx][str_idx] == '\0') {
|
||||
reserved_operators[op_idx] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RESERVED_OPS[op_idx][str_idx] != lexer->lookahead) {
|
||||
reserved_operators[op_idx] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (RESERVED_OPS[op_idx][str_idx + 1] == '\0') {
|
||||
reserved_operators[op_idx] = 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
possible_custom_operator = possible_custom_operator && is_legal_custom_operator(
|
||||
str_idx,
|
||||
first_char,
|
||||
lexer->lookahead
|
||||
);
|
||||
|
||||
uint32_t encountered_ops = encountered_op_count(possible_operators);
|
||||
if (encountered_ops == 0) {
|
||||
if (!possible_custom_operator) {
|
||||
break;
|
||||
} else if (mark_end && full_match == -1) {
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
last_examined_char = lexer->lookahead;
|
||||
lexer->advance(lexer, false);
|
||||
str_idx += 1;
|
||||
|
||||
if (encountered_ops == 0 && !is_legal_custom_operator(
|
||||
str_idx,
|
||||
first_char,
|
||||
lexer->lookahead
|
||||
)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (full_match != -1) {
|
||||
// We have a match -- first see if that match has a symbol that suppresses it. For example, in `try!`, we do not
|
||||
// want to emit the `!` as a symbol in our scanner, because we want the parser to have the chance to parse it as
|
||||
// an immediate token.
|
||||
uint64_t suppressing_symbols = OP_SYMBOL_SUPPRESSOR[full_match];
|
||||
if (suppressing_symbols) {
|
||||
for (uint64_t suppressor = 0; suppressor < TOKEN_COUNT; suppressor++) {
|
||||
if (!(suppressing_symbols & 1ULL << suppressor)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The suppressing symbol is valid in this position, so skip it.
|
||||
if (valid_symbols[suppressor]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
*symbol_result = OP_SYMBOLS[full_match];
|
||||
return true;
|
||||
}
|
||||
|
||||
if (possible_custom_operator && !any_reserved_ops(reserved_operators)) {
|
||||
if ((last_examined_char != '<' || iswspace(lexer->lookahead)) && mark_end) {
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
*symbol_result = CUSTOM_OPERATOR;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static enum ParseDirective eat_comment(
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols,
|
||||
bool mark_end,
|
||||
enum TokenType *symbol_result
|
||||
) {
|
||||
if (lexer->lookahead != '/') {
|
||||
return CONTINUE_PARSING_NOTHING_FOUND;
|
||||
}
|
||||
|
||||
advance(lexer);
|
||||
|
||||
if (lexer->lookahead != '*') {
|
||||
return CONTINUE_PARSING_SLASH_CONSUMED;
|
||||
}
|
||||
|
||||
advance(lexer);
|
||||
|
||||
bool after_star = false;
|
||||
unsigned nesting_depth = 1;
|
||||
for (;;) {
|
||||
switch (lexer->lookahead) {
|
||||
case '\0':
|
||||
return STOP_PARSING_END_OF_FILE;
|
||||
case '*':
|
||||
advance(lexer);
|
||||
after_star = true;
|
||||
break;
|
||||
case '/':
|
||||
if (after_star) {
|
||||
advance(lexer);
|
||||
after_star = false;
|
||||
nesting_depth--;
|
||||
if (nesting_depth == 0) {
|
||||
if (mark_end) {
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
*symbol_result = BLOCK_COMMENT;
|
||||
return STOP_PARSING_TOKEN_FOUND;
|
||||
}
|
||||
} else {
|
||||
advance(lexer);
|
||||
after_star = false;
|
||||
if (lexer->lookahead == '*') {
|
||||
nesting_depth++;
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
advance(lexer);
|
||||
after_star = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static enum ParseDirective eat_whitespace(
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols,
|
||||
enum TokenType *symbol_result
|
||||
) {
|
||||
enum ParseDirective ws_directive = CONTINUE_PARSING_NOTHING_FOUND;
|
||||
bool semi_is_valid = valid_symbols[IMPLICIT_SEMI] && valid_symbols[EXPLICIT_SEMI];
|
||||
uint32_t lookahead;
|
||||
while (should_treat_as_wspace(lookahead = lexer->lookahead)) {
|
||||
if (lookahead == ';') {
|
||||
if (semi_is_valid) {
|
||||
ws_directive = STOP_PARSING_TOKEN_FOUND;
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
lexer->advance(lexer, true);
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
if (ws_directive == CONTINUE_PARSING_NOTHING_FOUND && (lookahead == '\n' || lookahead == '\r')) {
|
||||
ws_directive = CONTINUE_PARSING_TOKEN_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
enum ParseDirective any_comment = CONTINUE_PARSING_NOTHING_FOUND;
|
||||
if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND && lookahead == '/') {
|
||||
bool has_seen_single_comment = false;
|
||||
while (lexer->lookahead == '/') {
|
||||
// It's possible that this is a comment - start an exploratory mission to find out, and if it is, look for what
|
||||
// comes after it. We care about what comes after it for the purpose of suppressing the newline.
|
||||
|
||||
enum TokenType multiline_comment_result;
|
||||
any_comment = eat_comment(lexer, valid_symbols, /* mark_end */ false, &multiline_comment_result);
|
||||
if (any_comment == STOP_PARSING_TOKEN_FOUND) {
|
||||
// This is a multiline comment. This scanner should be parsing those, so we might want to bail out and
|
||||
// emit it instead. However, we only want to do that if we haven't advanced through a _single_ line
|
||||
// comment on the way - otherwise that will get lumped into this.
|
||||
if (!has_seen_single_comment) {
|
||||
lexer->mark_end(lexer);
|
||||
*symbol_result = multiline_comment_result;
|
||||
return STOP_PARSING_TOKEN_FOUND;
|
||||
}
|
||||
} else if (any_comment == STOP_PARSING_END_OF_FILE) {
|
||||
return STOP_PARSING_END_OF_FILE;
|
||||
} else if (any_comment == CONTINUE_PARSING_SLASH_CONSUMED) {
|
||||
// We accidentally ate a slash -- we should actually bail out, say we saw nothing, and let the next pass
|
||||
// take it from after the newline.
|
||||
return CONTINUE_PARSING_SLASH_CONSUMED;
|
||||
} else if (lexer->lookahead == '/') {
|
||||
// There wasn't a multiline comment, which we know means that the comment parser ate its `/` and then
|
||||
// bailed out. If it had seen anything comment-like after that first `/` it would have continued going
|
||||
// and eventually had a well-formed comment or an EOF. Thus, if we're currently looking at a `/`, it's
|
||||
// the second one of those and it means we have a single-line comment.
|
||||
has_seen_single_comment = true;
|
||||
while (lexer->lookahead != '\n' && lexer->lookahead != '\0') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
} else if (iswspace(lexer->lookahead)) {
|
||||
// We didn't see any type of comment - in fact, we saw an operator that we don't normally treat as an
|
||||
// operator. Still, this is a reason to stop parsing.
|
||||
return STOP_PARSING_NOTHING_FOUND;
|
||||
}
|
||||
|
||||
// If we skipped through some comment, we're at whitespace now, so advance.
|
||||
while(iswspace(lexer->lookahead)) {
|
||||
any_comment = CONTINUE_PARSING_NOTHING_FOUND; // We're advancing, so clear out the comment
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
}
|
||||
|
||||
enum TokenType operator_result;
|
||||
bool saw_operator = eat_operators(
|
||||
lexer,
|
||||
valid_symbols,
|
||||
/* mark_end */ false,
|
||||
'\0',
|
||||
&operator_result
|
||||
);
|
||||
if (saw_operator) {
|
||||
// The operator we saw should suppress the newline, so bail out.
|
||||
return STOP_PARSING_NOTHING_FOUND;
|
||||
} else {
|
||||
// Promote the implicit newline to an explicit one so we don't check for operators again.
|
||||
*symbol_result = IMPLICIT_SEMI;
|
||||
ws_directive = STOP_PARSING_TOKEN_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
// Let's consume operators that can live after a "semicolon" style newline. Before we do that, though, we want to
|
||||
// check for a set of characters that we do not consume, but that still suppress the semi.
|
||||
if (ws_directive == CONTINUE_PARSING_TOKEN_FOUND) {
|
||||
for (int i = 0; i < NON_CONSUMING_CROSS_SEMI_CHAR_COUNT; i++) {
|
||||
if (NON_CONSUMING_CROSS_SEMI_CHARS[i] == lookahead) {
|
||||
return CONTINUE_PARSING_NOTHING_FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (semi_is_valid && ws_directive != CONTINUE_PARSING_NOTHING_FOUND) {
|
||||
*symbol_result = lookahead == ';' ? EXPLICIT_SEMI : IMPLICIT_SEMI;
|
||||
return ws_directive;
|
||||
}
|
||||
|
||||
return CONTINUE_PARSING_NOTHING_FOUND;
|
||||
}
|
||||
|
||||
#define DIRECTIVE_COUNT 4
|
||||
const char* DIRECTIVES[OPERATOR_COUNT] = {
|
||||
"if",
|
||||
"elseif",
|
||||
"else",
|
||||
"endif"
|
||||
};
|
||||
|
||||
const enum TokenType DIRECTIVE_SYMBOLS[DIRECTIVE_COUNT] = {
|
||||
DIRECTIVE_IF,
|
||||
DIRECTIVE_ELSEIF,
|
||||
DIRECTIVE_ELSE,
|
||||
DIRECTIVE_ENDIF
|
||||
};
|
||||
|
||||
static enum TokenType find_possible_compiler_directive(TSLexer *lexer) {
|
||||
bool possible_directives[DIRECTIVE_COUNT];
|
||||
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
|
||||
possible_directives[dir_idx] = true;
|
||||
}
|
||||
|
||||
int32_t str_idx = 0;
|
||||
int32_t full_match = -1;
|
||||
while(true) {
|
||||
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx++) {
|
||||
if (!possible_directives[dir_idx]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint8_t expected_char = DIRECTIVES[dir_idx][str_idx];
|
||||
if (expected_char == '\0') {
|
||||
full_match = dir_idx;
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
|
||||
if (expected_char != lexer->lookahead) {
|
||||
possible_directives[dir_idx] = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t match_count = 0;
|
||||
for (int dir_idx = 0; dir_idx < DIRECTIVE_COUNT; dir_idx += 1) {
|
||||
if (possible_directives[dir_idx]) {
|
||||
match_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (match_count == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
str_idx += 1;
|
||||
}
|
||||
|
||||
if (full_match == -1) {
|
||||
// No compiler directive found, so just match the starting symbol
|
||||
return HASH_SYMBOL;
|
||||
}
|
||||
|
||||
return DIRECTIVE_SYMBOLS[full_match];
|
||||
}
|
||||
|
||||
static bool eat_raw_str_part(
|
||||
struct ScannerState *state,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols,
|
||||
enum TokenType *symbol_result
|
||||
) {
|
||||
uint32_t hash_count = state->ongoing_raw_str_hash_count;
|
||||
if (!valid_symbols[RAW_STR_PART]) {
|
||||
return false;
|
||||
} else if (hash_count == 0) {
|
||||
// If this is a raw_str_part, it's the first one - look for hashes
|
||||
while (lexer->lookahead == '#') {
|
||||
hash_count += 1;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (hash_count == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
} else if (hash_count == 1) {
|
||||
lexer->mark_end(lexer);
|
||||
*symbol_result = find_possible_compiler_directive(lexer);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
} else if (valid_symbols[RAW_STR_CONTINUING_INDICATOR]) {
|
||||
// This is the end of an interpolation - now it's another raw_str_part. This is a synthetic
|
||||
// marker to tell us that the grammar just consumed a `(` symbol to close a raw
|
||||
// interpolation (since we don't want to fire on every `(` in existence). We don't have
|
||||
// anything to do except continue.
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We're in a state where anything other than `hash_count` hash symbols in a row should be eaten
|
||||
// and is part of a string.
|
||||
// The last character _before_ the hashes will tell us what happens next.
|
||||
// Matters are also complicated by the fact that we don't want to consume every character we
|
||||
// visit; if we see a `\#(`, for instance, with the appropriate number of hash symbols, we want
|
||||
// to end our parsing _before_ that sequence. This allows highlighting tools to treat that as a
|
||||
// separate token.
|
||||
while (lexer->lookahead != '\0') {
|
||||
uint8_t last_char = '\0';
|
||||
lexer->mark_end(lexer); // We always want to parse thru the start of the string so far
|
||||
// Advance through anything that isn't a hash symbol, because we want to count those.
|
||||
while (lexer->lookahead != '#' && lexer->lookahead != '\0') {
|
||||
last_char = lexer->lookahead;
|
||||
advance(lexer);
|
||||
if (last_char != '\\' || lexer->lookahead == '\\') {
|
||||
// Mark a new end, but only if we didn't just advance past a `\` symbol, since we
|
||||
// don't want to consume that. Exception: if this is a `\` that happens _right
|
||||
// after_ another `\`, we for some reason _do_ want to consume that, because
|
||||
// apparently that is parsed as a literal `\` followed by something escaped.
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
// We hit at least one hash - count them and see if they match.
|
||||
uint32_t current_hash_count = 0;
|
||||
while (lexer->lookahead == '#' && current_hash_count < hash_count) {
|
||||
current_hash_count += 1;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
// If we saw exactly the right number of hashes, one of three things is true:
|
||||
// 1. We're trying to interpolate into this string.
|
||||
// 2. The string just ended.
|
||||
// 3. This was just some hash characters doing nothing important.
|
||||
if (current_hash_count == hash_count) {
|
||||
if (last_char == '\\' && lexer->lookahead == '(') {
|
||||
// Interpolation case! Don't consume those chars; they get saved for grammar.js.
|
||||
*symbol_result = RAW_STR_PART;
|
||||
state->ongoing_raw_str_hash_count = hash_count;
|
||||
return true;
|
||||
} else if (last_char == '"') {
|
||||
// The string is finished! Mark the end here, on the very last hash symbol.
|
||||
lexer->mark_end(lexer);
|
||||
*symbol_result = RAW_STR_END_PART;
|
||||
state->ongoing_raw_str_hash_count = 0;
|
||||
return true;
|
||||
}
|
||||
// Nothing special happened - let the string continue.
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool tree_sitter_swift_external_scanner_scan(
|
||||
void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *valid_symbols
|
||||
) {
|
||||
// Figure out our scanner state
|
||||
struct ScannerState *state = (struct ScannerState *)payload;
|
||||
|
||||
// Consume any whitespace at the start.
|
||||
enum TokenType ws_result;
|
||||
enum ParseDirective ws_directive = eat_whitespace(lexer, valid_symbols, &ws_result);
|
||||
if (ws_directive == STOP_PARSING_TOKEN_FOUND) {
|
||||
lexer->result_symbol = ws_result;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ws_directive == STOP_PARSING_NOTHING_FOUND || ws_directive == STOP_PARSING_END_OF_FILE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool has_ws_result = (ws_directive == CONTINUE_PARSING_TOKEN_FOUND);
|
||||
|
||||
// Now consume comments (before custom operators so that those aren't treated as comments)
|
||||
enum TokenType comment_result;
|
||||
enum ParseDirective comment = ws_directive == CONTINUE_PARSING_SLASH_CONSUMED ? ws_directive : eat_comment(lexer, valid_symbols, /* mark_end */ true, &comment_result);
|
||||
if (comment == STOP_PARSING_TOKEN_FOUND) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = comment_result;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (comment == STOP_PARSING_END_OF_FILE) {
|
||||
return false;
|
||||
}
|
||||
// Now consume any operators that might cause our whitespace to be suppressed.
|
||||
enum TokenType operator_result;
|
||||
bool saw_operator = eat_operators(
|
||||
lexer,
|
||||
valid_symbols,
|
||||
/* mark_end */ !has_ws_result,
|
||||
comment == CONTINUE_PARSING_SLASH_CONSUMED ? '/' : '\0',
|
||||
&operator_result
|
||||
);
|
||||
|
||||
if (saw_operator && (!has_ws_result || is_cross_semi_token(operator_result))) {
|
||||
lexer->result_symbol = operator_result;
|
||||
if (has_ws_result) lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (has_ws_result) {
|
||||
// Don't `mark_end`, since we may have advanced through some operators.
|
||||
lexer->result_symbol = ws_result;
|
||||
return true;
|
||||
}
|
||||
|
||||
// NOTE: this will consume any `#` characters it sees, even if it does not find a result. Keep
|
||||
// it at the end so that it doesn't interfere with special literals or selectors!
|
||||
enum TokenType raw_str_result;
|
||||
bool saw_raw_str_part = eat_raw_str_part(state, lexer, valid_symbols, &raw_str_result);
|
||||
if (saw_raw_str_part) {
|
||||
lexer->result_symbol = raw_str_result;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
54
unified/extractor/tree-sitter-swift/src/tree_sitter/alloc.h
Normal file
54
unified/extractor/tree-sitter-swift/src/tree_sitter/alloc.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#ifndef TREE_SITTER_ALLOC_H_
|
||||
#define TREE_SITTER_ALLOC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Allow clients to override allocation functions
|
||||
#ifdef TREE_SITTER_REUSE_ALLOCATOR
|
||||
|
||||
extern void *(*ts_current_malloc)(size_t);
|
||||
extern void *(*ts_current_calloc)(size_t, size_t);
|
||||
extern void *(*ts_current_realloc)(void *, size_t);
|
||||
extern void (*ts_current_free)(void *);
|
||||
|
||||
#ifndef ts_malloc
|
||||
#define ts_malloc ts_current_malloc
|
||||
#endif
|
||||
#ifndef ts_calloc
|
||||
#define ts_calloc ts_current_calloc
|
||||
#endif
|
||||
#ifndef ts_realloc
|
||||
#define ts_realloc ts_current_realloc
|
||||
#endif
|
||||
#ifndef ts_free
|
||||
#define ts_free ts_current_free
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifndef ts_malloc
|
||||
#define ts_malloc malloc
|
||||
#endif
|
||||
#ifndef ts_calloc
|
||||
#define ts_calloc calloc
|
||||
#endif
|
||||
#ifndef ts_realloc
|
||||
#define ts_realloc realloc
|
||||
#endif
|
||||
#ifndef ts_free
|
||||
#define ts_free free
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_ALLOC_H_
|
||||
290
unified/extractor/tree-sitter-swift/src/tree_sitter/array.h
Normal file
290
unified/extractor/tree-sitter-swift/src/tree_sitter/array.h
Normal file
@@ -0,0 +1,290 @@
|
||||
#ifndef TREE_SITTER_ARRAY_H_
|
||||
#define TREE_SITTER_ARRAY_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./alloc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4101)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
#define Array(T) \
|
||||
struct { \
|
||||
T *contents; \
|
||||
uint32_t size; \
|
||||
uint32_t capacity; \
|
||||
}
|
||||
|
||||
/// Initialize an array.
|
||||
#define array_init(self) \
|
||||
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
|
||||
|
||||
/// Create an empty array.
|
||||
#define array_new() \
|
||||
{ NULL, 0, 0 }
|
||||
|
||||
/// Get a pointer to the element at a given `index` in the array.
|
||||
#define array_get(self, _index) \
|
||||
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
|
||||
|
||||
/// Get a pointer to the first element in the array.
|
||||
#define array_front(self) array_get(self, 0)
|
||||
|
||||
/// Get a pointer to the last element in the array.
|
||||
#define array_back(self) array_get(self, (self)->size - 1)
|
||||
|
||||
/// Clear the array, setting its size to zero. Note that this does not free any
|
||||
/// memory allocated for the array's contents.
|
||||
#define array_clear(self) ((self)->size = 0)
|
||||
|
||||
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
|
||||
/// less than the array's current capacity, this function has no effect.
|
||||
#define array_reserve(self, new_capacity) \
|
||||
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
|
||||
|
||||
/// Free any memory allocated for this array. Note that this does not free any
|
||||
/// memory allocated for the array's contents.
|
||||
#define array_delete(self) _array__delete((Array *)(self))
|
||||
|
||||
/// Push a new `element` onto the end of the array.
|
||||
#define array_push(self, element) \
|
||||
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
|
||||
(self)->contents[(self)->size++] = (element))
|
||||
|
||||
/// Increase the array's size by `count` elements.
|
||||
/// New elements are zero-initialized.
|
||||
#define array_grow_by(self, count) \
|
||||
do { \
|
||||
if ((count) == 0) break; \
|
||||
_array__grow((Array *)(self), count, array_elem_size(self)); \
|
||||
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
|
||||
(self)->size += (count); \
|
||||
} while (0)
|
||||
|
||||
/// Append all elements from one array to the end of another.
|
||||
#define array_push_all(self, other) \
|
||||
array_extend((self), (other)->size, (other)->contents)
|
||||
|
||||
/// Append `count` elements to the end of the array, reading their values from the
|
||||
/// `contents` pointer.
|
||||
#define array_extend(self, count, contents) \
|
||||
_array__splice( \
|
||||
(Array *)(self), array_elem_size(self), (self)->size, \
|
||||
0, count, contents \
|
||||
)
|
||||
|
||||
/// Remove `old_count` elements from the array starting at the given `index`. At
|
||||
/// the same index, insert `new_count` new elements, reading their values from the
|
||||
/// `new_contents` pointer.
|
||||
#define array_splice(self, _index, old_count, new_count, new_contents) \
|
||||
_array__splice( \
|
||||
(Array *)(self), array_elem_size(self), _index, \
|
||||
old_count, new_count, new_contents \
|
||||
)
|
||||
|
||||
/// Insert one `element` into the array at the given `index`.
|
||||
#define array_insert(self, _index, element) \
|
||||
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
|
||||
|
||||
/// Remove one element from the array at the given `index`.
|
||||
#define array_erase(self, _index) \
|
||||
_array__erase((Array *)(self), array_elem_size(self), _index)
|
||||
|
||||
/// Pop the last element off the array, returning the element by value.
|
||||
#define array_pop(self) ((self)->contents[--(self)->size])
|
||||
|
||||
/// Assign the contents of one array to another, reallocating if necessary.
|
||||
#define array_assign(self, other) \
|
||||
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
|
||||
|
||||
/// Swap one array with another
|
||||
#define array_swap(self, other) \
|
||||
_array__swap((Array *)(self), (Array *)(other))
|
||||
|
||||
/// Get the size of the array contents
|
||||
#define array_elem_size(self) (sizeof *(self)->contents)
|
||||
|
||||
/// Search a sorted array for a given `needle` value, using the given `compare`
|
||||
/// callback to determine the order.
|
||||
///
|
||||
/// If an existing element is found to be equal to `needle`, then the `index`
|
||||
/// out-parameter is set to the existing value's index, and the `exists`
|
||||
/// out-parameter is set to true. Otherwise, `index` is set to an index where
|
||||
/// `needle` should be inserted in order to preserve the sorting, and `exists`
|
||||
/// is set to false.
|
||||
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
|
||||
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
|
||||
|
||||
/// Search a sorted array for a given `needle` value, using integer comparisons
|
||||
/// of a given struct field (specified with a leading dot) to determine the order.
|
||||
///
|
||||
/// See also `array_search_sorted_with`.
|
||||
#define array_search_sorted_by(self, field, needle, _index, _exists) \
|
||||
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
|
||||
|
||||
/// Insert a given `value` into a sorted array, using the given `compare`
|
||||
/// callback to determine the order.
|
||||
#define array_insert_sorted_with(self, compare, value) \
|
||||
do { \
|
||||
unsigned _index, _exists; \
|
||||
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
|
||||
if (!_exists) array_insert(self, _index, value); \
|
||||
} while (0)
|
||||
|
||||
/// Insert a given `value` into a sorted array, using integer comparisons of
|
||||
/// a given struct field (specified with a leading dot) to determine the order.
|
||||
///
|
||||
/// See also `array_search_sorted_by`.
|
||||
#define array_insert_sorted_by(self, field, value) \
|
||||
do { \
|
||||
unsigned _index, _exists; \
|
||||
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
|
||||
if (!_exists) array_insert(self, _index, value); \
|
||||
} while (0)
|
||||
|
||||
// Private
|
||||
|
||||
typedef Array(void) Array;
|
||||
|
||||
/// This is not what you're looking for, see `array_delete`.
|
||||
static inline void _array__delete(Array *self) {
|
||||
if (self->contents) {
|
||||
ts_free(self->contents);
|
||||
self->contents = NULL;
|
||||
self->size = 0;
|
||||
self->capacity = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_erase`.
|
||||
static inline void _array__erase(Array *self, size_t element_size,
|
||||
uint32_t index) {
|
||||
assert(index < self->size);
|
||||
char *contents = (char *)self->contents;
|
||||
memmove(contents + index * element_size, contents + (index + 1) * element_size,
|
||||
(self->size - index - 1) * element_size);
|
||||
self->size--;
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_reserve`.
|
||||
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
|
||||
if (new_capacity > self->capacity) {
|
||||
if (self->contents) {
|
||||
self->contents = ts_realloc(self->contents, new_capacity * element_size);
|
||||
} else {
|
||||
self->contents = ts_malloc(new_capacity * element_size);
|
||||
}
|
||||
self->capacity = new_capacity;
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_assign`.
|
||||
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
|
||||
_array__reserve(self, element_size, other->size);
|
||||
self->size = other->size;
|
||||
memcpy(self->contents, other->contents, self->size * element_size);
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_swap`.
|
||||
static inline void _array__swap(Array *self, Array *other) {
|
||||
Array swap = *other;
|
||||
*other = *self;
|
||||
*self = swap;
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
|
||||
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
|
||||
uint32_t new_size = self->size + count;
|
||||
if (new_size > self->capacity) {
|
||||
uint32_t new_capacity = self->capacity * 2;
|
||||
if (new_capacity < 8) new_capacity = 8;
|
||||
if (new_capacity < new_size) new_capacity = new_size;
|
||||
_array__reserve(self, element_size, new_capacity);
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_splice`.
|
||||
static inline void _array__splice(Array *self, size_t element_size,
|
||||
uint32_t index, uint32_t old_count,
|
||||
uint32_t new_count, const void *elements) {
|
||||
uint32_t new_size = self->size + new_count - old_count;
|
||||
uint32_t old_end = index + old_count;
|
||||
uint32_t new_end = index + new_count;
|
||||
assert(old_end <= self->size);
|
||||
|
||||
_array__reserve(self, element_size, new_size);
|
||||
|
||||
char *contents = (char *)self->contents;
|
||||
if (self->size > old_end) {
|
||||
memmove(
|
||||
contents + new_end * element_size,
|
||||
contents + old_end * element_size,
|
||||
(self->size - old_end) * element_size
|
||||
);
|
||||
}
|
||||
if (new_count > 0) {
|
||||
if (elements) {
|
||||
memcpy(
|
||||
(contents + index * element_size),
|
||||
elements,
|
||||
new_count * element_size
|
||||
);
|
||||
} else {
|
||||
memset(
|
||||
(contents + index * element_size),
|
||||
0,
|
||||
new_count * element_size
|
||||
);
|
||||
}
|
||||
}
|
||||
self->size += new_count - old_count;
|
||||
}
|
||||
|
||||
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
|
||||
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
|
||||
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
|
||||
do { \
|
||||
*(_index) = start; \
|
||||
*(_exists) = false; \
|
||||
uint32_t size = (self)->size - *(_index); \
|
||||
if (size == 0) break; \
|
||||
int comparison; \
|
||||
while (size > 1) { \
|
||||
uint32_t half_size = size / 2; \
|
||||
uint32_t mid_index = *(_index) + half_size; \
|
||||
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
|
||||
if (comparison <= 0) *(_index) = mid_index; \
|
||||
size -= half_size; \
|
||||
} \
|
||||
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
|
||||
if (comparison == 0) *(_exists) = true; \
|
||||
else if (comparison < 0) *(_index) += 1; \
|
||||
} while (0)
|
||||
|
||||
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
|
||||
/// parameter by reference in order to work with the generic sorting function above.
|
||||
#define _compare_int(a, b) ((int)*(a) - (int)(b))
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(default : 4101)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_ARRAY_H_
|
||||
266
unified/extractor/tree-sitter-swift/src/tree_sitter/parser.h
Normal file
266
unified/extractor/tree-sitter-swift/src/tree_sitter/parser.h
Normal file
@@ -0,0 +1,266 @@
|
||||
#ifndef TREE_SITTER_PARSER_H_
|
||||
#define TREE_SITTER_PARSER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSStateId;
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
bool named;
|
||||
bool supertype;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
void (*log)(const TSLexer *, const char *, ...);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
TSParseActionTypeReduce,
|
||||
TSParseActionTypeAccept,
|
||||
TSParseActionTypeRecover,
|
||||
} TSParseActionType;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t type;
|
||||
TSStateId state;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
} shift;
|
||||
struct {
|
||||
uint8_t type;
|
||||
uint8_t child_count;
|
||||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint16_t production_id;
|
||||
} reduce;
|
||||
uint8_t type;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
uint8_t count;
|
||||
bool reusable;
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
typedef struct {
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} TSCharacterRange;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
uint32_t state_count;
|
||||
uint32_t large_state_count;
|
||||
uint32_t production_id_count;
|
||||
uint32_t field_count;
|
||||
uint16_t max_alias_sequence_length;
|
||||
const uint16_t *parse_table;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char * const *symbol_names;
|
||||
const char * const *field_names;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
uint32_t index = 0;
|
||||
uint32_t size = len - index;
|
||||
while (size > 1) {
|
||||
uint32_t half_size = size / 2;
|
||||
uint32_t mid_index = index + half_size;
|
||||
TSCharacterRange *range = &ranges[mid_index];
|
||||
if (lookahead >= range->start && lookahead <= range->end) {
|
||||
return true;
|
||||
} else if (lookahead > range->end) {
|
||||
index = mid_index;
|
||||
}
|
||||
size -= half_size;
|
||||
}
|
||||
TSCharacterRange *range = &ranges[index];
|
||||
return (lookahead >= range->start && lookahead <= range->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define UNUSED __pragma(warning(suppress : 4101))
|
||||
#else
|
||||
#define UNUSED __attribute__((unused))
|
||||
#endif
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
UNUSED \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ADVANCE_MAP(...) \
|
||||
{ \
|
||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
||||
if (map[i] == lookahead) { \
|
||||
state = map[i + 1]; \
|
||||
goto next_state; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
result = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
#define END_STATE() return result;
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value) \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_REPEAT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value), \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_name, \
|
||||
.child_count = children, \
|
||||
.dynamic_precedence = precedence, \
|
||||
.production_id = prod_id \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeRecover \
|
||||
}}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeAccept \
|
||||
}}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSER_H_
|
||||
39
unified/extractor/tree-sitter-swift/tree-sitter.json
Normal file
39
unified/extractor/tree-sitter-swift/tree-sitter.json
Normal file
@@ -0,0 +1,39 @@
|
||||
{
|
||||
"grammars": [
|
||||
{
|
||||
"name": "swift",
|
||||
"camelcase": "Swift",
|
||||
"scope": "source.swift",
|
||||
"path": ".",
|
||||
"file-types": [
|
||||
"swift"
|
||||
],
|
||||
"highlights": "queries/highlights.scm",
|
||||
"injections": "queries/injections.scm",
|
||||
"locals": "queries/locals.scm",
|
||||
"injection-regex": "swift"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"version": "0.7.2",
|
||||
"license": "MIT",
|
||||
"description": "A tree-sitter grammar for the Swift programming language.",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Alex Pinkus",
|
||||
"email": "alex.pinkus@gmail.com"
|
||||
}
|
||||
],
|
||||
"links": {
|
||||
"repository": "git+https://github.com/alex-pinkus/tree-sitter-swift.git"
|
||||
}
|
||||
},
|
||||
"bindings": {
|
||||
"c": true,
|
||||
"go": true,
|
||||
"node": true,
|
||||
"python": true,
|
||||
"rust": true,
|
||||
"swift": true
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user