mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Python: Copy Python extractor to codeql repo
This commit is contained in:
1
python/extractor/tsg-python/.gitignore
vendored
Normal file
1
python/extractor/tsg-python/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
target/
|
||||
16
python/extractor/tsg-python/BUILD.bazel
Normal file
16
python/extractor/tsg-python/BUILD.bazel
Normal file
@@ -0,0 +1,16 @@
|
||||
load("@tsg_python_crate_index//:defs.bzl", "aliases", "all_crate_deps")
|
||||
load("//:common.bzl", "codeql_rust_binary")
|
||||
|
||||
codeql_rust_binary(
|
||||
name = "tsg-python",
|
||||
srcs = ["src/main.rs"],
|
||||
aliases = aliases(),
|
||||
data = ["python.tsg"],
|
||||
proc_macro_deps = all_crate_deps(
|
||||
proc_macro = True,
|
||||
),
|
||||
visibility = ["//visibility:public"],
|
||||
deps = all_crate_deps(
|
||||
normal = True,
|
||||
) + ["//extractor-python/tsg-python/tree-sitter-python"],
|
||||
)
|
||||
2346
python/extractor/tsg-python/Cargo.Bazel.lock
Normal file
2346
python/extractor/tsg-python/Cargo.Bazel.lock
Normal file
File diff suppressed because it is too large
Load Diff
331
python/extractor/tsg-python/Cargo.lock
generated
Normal file
331
python/extractor/tsg-python/Cargo.lock
generated
Normal file
@@ -0,0 +1,331 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"strsim",
|
||||
"textwrap",
|
||||
"unicode-width",
|
||||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.136"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||
|
||||
[[package]]
|
||||
name = "string-interner"
|
||||
version = "0.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.76"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6f107db402c2c2055242dbf4d2af0e69197202e9faacbef9571bbe47f5a1b84"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "602eca064b2d83369e2b2f34b09c70b605402801927c65c11071ac911d299b88"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bad553cc2c78e8de258400763a647e80e6d1b31ee237275d756f6836d204494c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.20.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-graph"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"string-interner",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-python"
|
||||
version = "0.19.0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tsg-python"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"regex",
|
||||
"smallvec",
|
||||
"string-interner",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
"tree-sitter-graph",
|
||||
"tree-sitter-python",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
26
python/extractor/tsg-python/Cargo.toml
Normal file
26
python/extractor/tsg-python/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[workspace]
|
||||
|
||||
[package]
|
||||
name = "tsg-python"
|
||||
version = "0.1.0"
|
||||
authors = ["Taus Brock-Nannestad <tausbn@github.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# When changing/updating these, the `Cargo.Bazel.lock` file has to be regenerated.
|
||||
# Check out the documentation at https://bazelbuild.github.io/rules_rust/crate_universe.html#repinning--updating-dependencies
|
||||
# for how to do so. The bazel repository for the tsg-python project is called `tsg_python_crate_index`,
|
||||
# and instead of calling `bazel sync`, `./build --bazel sync` should be used instead, to always use the correct bazel version.
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
regex = "1"
|
||||
smallvec = { version="1.6", features=["union"] }
|
||||
thiserror = "1.0"
|
||||
tree-sitter = "0.20.4"
|
||||
tree-sitter-graph = "0.7.0"
|
||||
tree-sitter-python = {path = "tree-sitter-python"}
|
||||
clap = "2.32"
|
||||
|
||||
[dependencies.string-interner]
|
||||
version = "0.12"
|
||||
default-features = false
|
||||
features = ["std", "inline-more", "backends"]
|
||||
202
python/extractor/tsg-python/LICENSE-APACHE
Normal file
202
python/extractor/tsg-python/LICENSE-APACHE
Normal file
@@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
21
python/extractor/tsg-python/LICENSE-MIT
Normal file
21
python/extractor/tsg-python/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 stack-graphs authors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
624
python/extractor/tsg-python/README.md
Normal file
624
python/extractor/tsg-python/README.md
Normal file
@@ -0,0 +1,624 @@
|
||||
# `tsg-python`
|
||||
|
||||
Run `tree-sitter-graph` queries against Python source files.
|
||||
|
||||
## How to build
|
||||
|
||||
Run `cargo build --release`. The resulting binary can be found in the `target/release` directory.
|
||||
|
||||
## How to invoke
|
||||
|
||||
`tsg-python tsg-file.tsg python-file.py`
|
||||
|
||||
Output is emitted on `stdout`.
|
||||
|
||||
If you're impatient, you can also build and run using `cargo run` followed by the arguments given
|
||||
above.
|
||||
|
||||
## How to use
|
||||
|
||||
To use `tsg-python`, you must have an appropriate `.tsg` file containing the directions for how to
|
||||
construct a Python AST from the output of `tree-sitter-python`.
|
||||
|
||||
### A quick primer on `tree-sitter-graph` syntax
|
||||
|
||||
A file consists of a sequence of stanzas. Each stanza consists of a query (using the [tree-sitter
|
||||
query syntax](https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries)) and a sequence of nodes and edges to define for each query match in the source file.
|
||||
Queries will (almost always) include captures like `@foo`, which means any occurrence of `@foo` in
|
||||
the corresponding stanza will refer to a particular syntax node in the bit that the query matches.
|
||||
|
||||
Stanzas are executed in order, and a stanza is only run when all possible matches have been
|
||||
exhausted for all preceding stanzas. (Since the syntax tree that is matched against never changes,
|
||||
execution never jumps back to an earlier stanza.)
|
||||
|
||||
Inside stanzas, scoped variables have the form `@foo.bar` where `@foo` is a capture in the
|
||||
associated query, and `bar` is an identifier. This should be thought of as a variable that is
|
||||
"attached" to the `tree-sitter` node that `@foo` refers to. If `@baz` is another reference to the same node as
|
||||
`@foo` (perhaps even in a different stanza), then `@baz.bar` will be a reference to the _same_
|
||||
scoped variable. This permits information to be linked across different stanzas.
|
||||
|
||||
Assigning a value to a scoped variable is done using the syntax `let @foo.bar = some-expr` (`let`
|
||||
for immutable variables, `var` for mutable variables, which may be mutated using `set`). Note that
|
||||
scoped variables only exist during the execution of the stack graph, and are not immediately part of
|
||||
the output graph.
|
||||
|
||||
To actually produce output, we must specify some `node`s or `edge`s and possibly `attr`ibutes
|
||||
thereof.
|
||||
|
||||
To produce a node, we declare `node @foo.bar` (which is equivalent to `let @foo.bar = (node)`, the
|
||||
right hand side being a function that creates a new node). In the output, nodes are simply integers.
|
||||
|
||||
To assign an attribute to a node, we write `attr (@foo.bar) identifier = expr`, for some suitable
|
||||
choice of `identifier` and `expr`. In the output, attributes are given alongside nodes in a `key:
|
||||
value` notation.
|
||||
|
||||
For edges and their attributes, the syntax is similar:
|
||||
|
||||
`edge @foo.bar -> @baz.quux`
|
||||
|
||||
and
|
||||
|
||||
`attr (@foo.bar -> @baz.quux) identifier = expr`.
|
||||
|
||||
Note that it is an error to declare the same node, edge, (or attribute of either of these) twice.
|
||||
|
||||
### The general scheme:
|
||||
|
||||
|
||||
For fields that point to some literal value
|
||||
```tsg
|
||||
<some capture involving @nd>
|
||||
{
|
||||
attr (@nd.node) field_name = some_value
|
||||
}
|
||||
```
|
||||
|
||||
For fields that point directly to an AST node:
|
||||
|
||||
```tsg
|
||||
<some capture involving @parent and @child>
|
||||
{
|
||||
attr (@parent.node) field_name = @child.node
|
||||
}
|
||||
```
|
||||
|
||||
For fields that point to lists of AST nodes:
|
||||
|
||||
```tsg
|
||||
<some capture involving @parent and @child>
|
||||
{
|
||||
edge @parent.node -> @child.node
|
||||
attr (@parent.node -> @child.node) field_name = <index of @child in the resulting list>
|
||||
}
|
||||
```
|
||||
|
||||
Scoped variables of the form `@foo.node` are used to tie the AST together, and so it's important
|
||||
that this is set for nodes that map directly onto `tree-sitter-python` nodes. Thus, for instance
|
||||
for binary operators, the stanza could look as follows:
|
||||
|
||||
```tsg
|
||||
(binary_operator
|
||||
left: (_) @left
|
||||
right: (_) @right
|
||||
) @bin
|
||||
{
|
||||
attr (@bin.node) left = @left.node
|
||||
attr (@bin.node) right = @right.node
|
||||
}
|
||||
```
|
||||
|
||||
Note in particular the `@left.node` and `@right.node` references. In order for the above stanza to
|
||||
work, these scoped variables _must_ exist and point to suitable graph `node`s.
|
||||
|
||||
In practice, the setting up of all of these scoped variables (and creation of output graph nodes)
|
||||
will happen at the very top of the `.tsg` file, to ensure that these scoped variables are defined
|
||||
for the remainder of the file.
|
||||
|
||||
To ease the creation of these variables, we have the `ast-node` convenience function. For binary
|
||||
operators, it would take the following form:
|
||||
|
||||
```tsg
|
||||
(binary_operator) @bin
|
||||
{
|
||||
let @bin.node = (ast-node @bin "BinOp")
|
||||
}
|
||||
```
|
||||
Here, the two arguments are respectively
|
||||
- a `tree-sitter` node (which is used to set the location of `@bin.node`), and
|
||||
- a string (which is used to set the "kind" of `@bin.node`)
|
||||
|
||||
In effect, the call
|
||||
|
||||
```tsg
|
||||
let @bin.node = (ast-node @bin "BinOp")
|
||||
```
|
||||
|
||||
is exactly equivalent to the more verbose
|
||||
|
||||
```tsg
|
||||
node @bin.node ; or equivalently `let @bin.node = (node)`
|
||||
attr (@bin.node) _location = (location @bin)
|
||||
attr (@bin.node) _kind = "BinOp"
|
||||
```
|
||||
|
||||
As the above suggests, attributes that start with an underscore are interpreted in a special way
|
||||
when reconstructing the AST.
|
||||
|
||||
### Special attributes
|
||||
|
||||
#### The `_kind` attribute (mandatory)
|
||||
Should be set to a string consisting of the name of the corresponding Python AST class. This
|
||||
information will be used to build the AST, and so it is an error if this is left out.
|
||||
|
||||
Generally, this (and `_location`) will be set using the `ast-node` function.
|
||||
|
||||
#### The `_skip_to` attribute (optional)
|
||||
This is used to indicate that the present graph node should _not_ be turned into an AST node, but that the
|
||||
graph node contained in this attribute should be used instead. That graph node may _also_ contain a
|
||||
`_skip_to` field, in which case the entire chain is followed until a node is encountered that does
|
||||
not have a `_skip_to` field. (Please ensure that there are no cycles of `_skip_to` pointers.)
|
||||
|
||||
Example:
|
||||
|
||||
In `tree-sitter-python`, assignment statements are a form of `expression_statement`, and this node
|
||||
type also encompasses things like expressions (e.g. `2+2`) appearing at the level of statements. In
|
||||
the internal Python AST, we need to separate the assignment from such expressions. The assignment should be present as an `Assign` node, but `2+2` should be
|
||||
wrapped in an `Expr` node. To solve this, we create an `Expr` for each `expression_statement`, and
|
||||
then explicitly skip this node in the AST if it contains an `assignment`. This is implemented as
|
||||
follows:
|
||||
```tsg
|
||||
(expression_statement (assignment) @inner) @outer
|
||||
{
|
||||
attr (@outer.node) _skip_to = @inner.node
|
||||
}
|
||||
```
|
||||
|
||||
#### The `_location` attribute (optional)
|
||||
This attribute is used to indicate the location of the corresponding AST node. As with `_kind` it
|
||||
should be set using the `ast-node` function.
|
||||
|
||||
#### The `_location_start` and `_location_end` attributes (optional)
|
||||
These attributes are used to indicate the start or end of the location of the AST node. They can be
|
||||
used for nodes where `_location` has already been set, in which case they override the relevant part
|
||||
of that location. For an example of this see the worked example on `if` statements below.
|
||||
#### The `_start_line`, `_start_column`, `_end_line`, and `_end_column` attributes (optional)
|
||||
These can be used to set the start or end position of an AST node with even greater detail than the
|
||||
preceding attributes. As with the `_location_start` and `_location_end` attributes, these will
|
||||
override the values of the corresponding part of the location.
|
||||
|
||||
In general, these attributes should be used sparingly, as they are quite verbose.
|
||||
|
||||
### Built-in functions
|
||||
#### `(source-text` _`tree-sitter-node`_`)` (built-in)
|
||||
This function returns the source text of the `tree-sitter` node it receives as an argument.
|
||||
|
||||
Example:
|
||||
|
||||
Extracting the operator from a binary expression:
|
||||
```tsg
|
||||
(binary_operator
|
||||
operator: _ @op
|
||||
) @bin
|
||||
{
|
||||
attr (@bin.node) op = (source-text @op)
|
||||
}
|
||||
```
|
||||
|
||||
#### `(ast-node` _`tree-sitter-node`_ _`string`_`)` (`tsg-python` only)
|
||||
Creates a new graph node with the given `_kind` and sets the `_location` attribute to the location
|
||||
of the given `tree-sitter` node.
|
||||
#### `(child-index` _`tree-sitter-node`_`)` (built-in)
|
||||
Returns the index of the given `tree-sitter` node in its parent.
|
||||
#### `(location` _`tree-sitter-node`_`)` (`tsg-python` only)
|
||||
Returns the location of the given `tree-sitter` node as a list containing four integers
|
||||
corresponding to the start row and column, followed by the end row and column.
|
||||
#### `(location-start` _`tree-sitter-node`_`)` and `(location-end` _`tree-sitter-node`_`)` (`tsg-python` only)
|
||||
Returns the start or end position (row followed by column) of the given `tree-sitter` node (as a list containing two integers).
|
||||
#### `start-row`, `start-column`, `end-row`, and `end-column` (built-in)
|
||||
(All of these take a `tree-sitter-node` as an argument.)
|
||||
|
||||
Returns an integer corresponding to the appropriate part of the location of the given `tree-sitter` node.
|
||||
|
||||
### A worked example: `if` statements
|
||||
|
||||
The way the current parser handles `if` statements means we cannot do a straight mapping from the tree-sitter grammar to the AST. In particular, a block of code such as
|
||||
|
||||
```python
|
||||
if x: do_x
|
||||
elif y: do_y
|
||||
elif z: do_z
|
||||
else: do_else
|
||||
```
|
||||
|
||||
is unrolled into the following form by the current parser:
|
||||
|
||||
```python
|
||||
if x: do_x
|
||||
else:
|
||||
if y: do_y
|
||||
else:
|
||||
if z: do_z
|
||||
else: do_else
|
||||
```
|
||||
|
||||
This means we have to synthesise nodes for the inner `if` statements.
|
||||
|
||||
However, this should be straightforward -- we simply have to make sure that `elif_clause`s also
|
||||
produce the appropriate kind of node, and that everything is linked up correctly.
|
||||
|
||||
For references, here are the productions for `if_statement`, `else_clause` and `elif_clause` in
|
||||
`tree-sitter-python`
|
||||
|
||||
```javascript
|
||||
if_statement: $ => seq(
|
||||
'if',
|
||||
field('condition', $.expression),
|
||||
':',
|
||||
field('consequence', $._suite),
|
||||
repeat(field('alternative', $.elif_clause)),
|
||||
optional(field('alternative', $.else_clause))
|
||||
),
|
||||
|
||||
elif_clause: $ => seq(
|
||||
'elif',
|
||||
field('condition', $.expression),
|
||||
':',
|
||||
field('consequence', $._suite)
|
||||
),
|
||||
|
||||
else_clause: $ => seq(
|
||||
'else',
|
||||
':',
|
||||
field('body', $._suite)
|
||||
),
|
||||
```
|
||||
|
||||
First, we'll set up all of the relevant nodes with corresponding nodes in the AST:
|
||||
|
||||
```tsg
|
||||
|
||||
(if_statement)
|
||||
@tree_sitter_node
|
||||
{
|
||||
let @tree_sitter_node.node = (ast-node @tree_sitter_node "If")
|
||||
}
|
||||
```
|
||||
|
||||
This ensures that we can reference the `.node` scoped variable on the above nodes.
|
||||
|
||||
(We named the capture `@tree_sitter_node` above to make it more clear, but in general something like
|
||||
`@if` would be more appropriate.)
|
||||
|
||||
In particular, since we want `elif`s to be turned into nested `if`s, it makes sense to apply the
|
||||
`If` kind to `elif_clauses` as well:
|
||||
|
||||
```tsg
|
||||
(elif_clause) @elif
|
||||
{
|
||||
let @elif.node = (ast-node @elif "If")
|
||||
}
|
||||
```
|
||||
Whenever we refer to a node, we must ensure that it has first been defined, however there is no
|
||||
need to do this separately for each node.
|
||||
|
||||
Next, for both `if`s and `elif`s, we want to record the `test` and the `body`. The `test` we do as follows:
|
||||
|
||||
```tsg
|
||||
[
|
||||
(if_statement
|
||||
condition: (_) @test) @if
|
||||
(elif_clause
|
||||
condition: (_) @test) @if
|
||||
]
|
||||
{
|
||||
attr (@if.node) test = @test.node
|
||||
}
|
||||
```
|
||||
For `body`, in the Python AST this is simply a list of nodes, whereas for the `tree-sitter` parse tree, it
|
||||
will contain a `block` node. Because there is no Python AST equivalent for `block`, we skip over
|
||||
this node when linking the `if`-statement to its body:
|
||||
```tsg
|
||||
[
|
||||
(if_statement
|
||||
consequence: (block (_) @stmt)) @parent
|
||||
(elif_clause
|
||||
consequence: (block (_) @stmt)) @parent
|
||||
]
|
||||
{
|
||||
edge @parent.node -> @stmt.node
|
||||
attr (@parent.node -> @stmt.node) body = (child-index @stmt)
|
||||
}
|
||||
```
|
||||
The above shows how we handle fields containing lists of items: we add an edge from the parent node
|
||||
to each child node, and put an attribute on that edge. The name of the attribute will be the name of
|
||||
the field, and the value will be the index of this node among the children of its `tree-sitter` parent.
|
||||
|
||||
Now we can begin unwinding the nesting. First of all, the first `elif` should be the `orelse` of the
|
||||
initial `if_statement`:
|
||||
|
||||
```tsg
|
||||
(if_statement
|
||||
consequence: (_)
|
||||
.
|
||||
(elif_clause) @elif
|
||||
) @if
|
||||
{
|
||||
edge @if.node -> @elif.node
|
||||
attr (@if.node -> @elif.node) orelse = 0
|
||||
}
|
||||
```
|
||||
(The `.` acts as an anchor, forcing its two neighbours to be adjancent in the tree. So in this case,
|
||||
we get the first `elif` after the body of the `if`)
|
||||
|
||||
Next, whenever we have two adjacent `elif`s, we want the `orelse` of the first one to be the second one:
|
||||
|
||||
```tsg
|
||||
(
|
||||
(elif_clause) @elif1
|
||||
.
|
||||
(elif_clause) @elif2
|
||||
)
|
||||
{
|
||||
edge @elif1.node -> @elif2.node
|
||||
attr (@elif1.node -> @elif2.node) orelse = 0
|
||||
}
|
||||
```
|
||||
|
||||
Finally, the `else` branch of the outermost `if` should be the `orelse` of the _last_ `elif`:
|
||||
|
||||
```tsg
|
||||
(if_statement
|
||||
(elif_clause) @elif
|
||||
.
|
||||
alternative: (else_clause body: (block (_) @orelse))
|
||||
)
|
||||
{
|
||||
edge @elif.node -> @orelse.node
|
||||
attr (@elif.node -> @orelse.node) orelse = (child-index @orelse)
|
||||
}
|
||||
```
|
||||
|
||||
The above gives us the correct tree structure, but we're still missing a few bits (such as
|
||||
locations). To capture location information we use the following stanza:
|
||||
```tsg
|
||||
[
|
||||
(if_statement
|
||||
condition: (_)
|
||||
":" @colon) @if
|
||||
(elif_clause
|
||||
condition: (_)
|
||||
":" @colon) @if
|
||||
]
|
||||
{
|
||||
attr (@if.node) _location_end = (location-end @colon)
|
||||
}
|
||||
```
|
||||
Because `tree-sitter-python` disagrees with the Python AST about the location of the `If` node, we
|
||||
have to adjust it. We do this by setting the `_location_end` attribute to the end of the `:` token.
|
||||
(Note that the _start_ of this location was set when we called `ast-node` above. As we don't have to
|
||||
change this part of the location, we simply leave it as is.)
|
||||
|
||||
|
||||
|
||||
### Synthesizing nodes
|
||||
In many cases it will be sufficient to hook up AST nodes to the corresponding `tree-sitter` nodes,
|
||||
but occasionally we want the tree structure to be different. One example of this would be the
|
||||
`class` statement. For instance, a class declaration such as
|
||||
|
||||
```python
|
||||
class Foo(int, object, metaclass=type):
|
||||
x = 5
|
||||
```
|
||||
|
||||
has a `tree-sitter-python` parse tree that looks like this:
|
||||
|
||||
```
|
||||
module [0, 0] - [2, 0]
|
||||
class_definition [0, 0] - [1, 9]
|
||||
name: identifier [0, 6] - [0, 9]
|
||||
superclasses: argument_list [0, 9] - [0, 38]
|
||||
identifier [0, 10] - [0, 13]
|
||||
identifier [0, 15] - [0, 21]
|
||||
keyword_argument [0, 23] - [0, 37]
|
||||
name: identifier [0, 23] - [0, 32]
|
||||
value: identifier [0, 33] - [0, 37]
|
||||
body: block [1, 4] - [1, 9]
|
||||
expression_statement [1, 4] - [1, 9]
|
||||
assignment [1, 4] - [1, 9]
|
||||
left: identifier [1, 4] - [1, 5]
|
||||
right: integer [1, 8] - [1, 9]
|
||||
```
|
||||
|
||||
but the Python AST looks like _this_:
|
||||
|
||||
```
|
||||
Module: [1, 0] - [3, 0]
|
||||
body: [
|
||||
Assign: [1, 0] - [1, 39]
|
||||
targets: [
|
||||
Name: [1, 6] - [1, 9]
|
||||
variable: Variable('Foo', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
ClassExpr: [1, 0] - [1, 39]
|
||||
name: 'Foo'
|
||||
bases: [
|
||||
Name: [1, 10] - [1, 13]
|
||||
variable: Variable('int', None)
|
||||
ctx: Load
|
||||
Name: [1, 15] - [1, 21]
|
||||
variable: Variable('object', None)
|
||||
ctx: Load
|
||||
]
|
||||
keywords: [
|
||||
keyword: [1, 23] - [1, 37]
|
||||
arg: 'metaclass'
|
||||
value:
|
||||
Name: [1, 33] - [1, 37]
|
||||
variable: Variable('type', None)
|
||||
ctx: Load
|
||||
]
|
||||
inner_scope:
|
||||
Class: [1, 0] - [1, 39]
|
||||
name: 'Foo'
|
||||
body: [
|
||||
Assign: [2, 4] - [2, 9]
|
||||
targets: [
|
||||
Name: [2, 4] - [2, 5]
|
||||
variable: Variable('x', None)
|
||||
ctx: Store
|
||||
]
|
||||
value:
|
||||
Num: [2, 8] - [2, 9]
|
||||
n: 5
|
||||
text: '5'
|
||||
]
|
||||
]
|
||||
```
|
||||
|
||||
In particular, we unroll the `class` statement into an explicit assignment (which is the top node
|
||||
for this statement in the AST) of a synthetic `ClassExpr`, which in turn contains a `Class` node
|
||||
(which holds things like the body of the class). This requires too many nodes to simply reuse what's given to
|
||||
us by `tree-sitter-python`, and so we must _synthesize_ additional nodes.
|
||||
|
||||
First of all, let us set up the outer node to be an `Assign` node:
|
||||
```tsg
|
||||
(class_definition) @class
|
||||
{
|
||||
let @class.node = (ast-node @class "Assign")
|
||||
}
|
||||
```
|
||||
|
||||
Next, we can do most of the work in a single stanza:
|
||||
|
||||
```tsg
|
||||
(class_definition
|
||||
name: (identifier) @name
|
||||
":" @colon
|
||||
) @class
|
||||
{
|
||||
|
||||
; To make it clearer that the outer node is an assignment, we create an alias for it.
|
||||
let @class.assign = @class.node
|
||||
|
||||
; Synthesized nodes: the left-hand side of the assignment, the class_expr node, and the class
|
||||
; node.
|
||||
|
||||
let @class.assign_lhs = (ast-node @name "Name")
|
||||
let @class.class_expr = (ast-node @class "ClassExpr")
|
||||
let @class.inner_scope = (ast-node @class "Class")
|
||||
|
||||
edge @class.assign -> @class.assign_lhs
|
||||
attr (@class.assign -> @class.assign_lhs) targets = 0
|
||||
attr (@class.assign) value = @class.class_expr
|
||||
attr (@class.assign) _location_end = (location-end @colon)
|
||||
|
||||
let class_name = (source-text @name)
|
||||
|
||||
; The left-hand side of the assignment, a `Name`.
|
||||
attr (@class.assign_lhs) variable = class_name
|
||||
attr (@class.assign_lhs) ctx = "store"
|
||||
|
||||
; The right hand side of the assignment, a `ClassExpr`.
|
||||
attr (@class.class_expr) name = class_name
|
||||
attr (@class.class_expr) inner_scope = @class.inner_scope
|
||||
; `bases` will be set elsewhere
|
||||
; `keywords` will be set elsewhere
|
||||
attr (@class.class_expr) _location_end = (location-end @colon)
|
||||
|
||||
; The inner scope of the class_expr, a `Class`.
|
||||
attr (@class.inner_scope) name = class_name
|
||||
; body will be set in a separate stanza.
|
||||
attr (@class.inner_scope) _location_end = (location-end @colon)
|
||||
|
||||
}
|
||||
```
|
||||
|
||||
Let's go over these lines bit by bit. First, we create an alias for the outermost node (which will
|
||||
become an assignment node) in order to make it clearer that it's an assignment. Next, we create
|
||||
_new_ nodes for the inner synthesized nodes. Note that we can't assign these to `@class.node` as
|
||||
that already points to the node that will become the assignment node. Instead, we create new scoped
|
||||
variables (with suitable names), and assign them nodes (with appropriate kinds and locations using
|
||||
`ast-node`).
|
||||
```tsg
|
||||
; To make it clearer that the outer node is an assignment, we create an alias for it.
|
||||
let @class.assign = @class.node
|
||||
|
||||
; Synthesized nodes: the left-hand side of the assignment, the class_expr node, and the class
|
||||
; node.
|
||||
|
||||
let @class.assign_lhs = (ast-node @name "Name")
|
||||
let @class.class_expr = (ast-node @class "ClassExpr")
|
||||
let @class.inner_scope = (ast-node @class "Class")
|
||||
```
|
||||
|
||||
Next, we set up the outer assignment:
|
||||
```tsg
|
||||
edge @class.assign -> @class.assign_lhs
|
||||
attr (@class.assign -> @class.assign_lhs) targets = 0
|
||||
attr (@class.assign) value = @class.class_expr
|
||||
attr (@class.assign) _location_end = (location-end @colon)
|
||||
```
|
||||
|
||||
The remaining nodes all contain a field that refers to the name of the class, so put this in a local
|
||||
variable for convenience:
|
||||
```tsg
|
||||
let class_name = (source-text @name)
|
||||
```
|
||||
We set up the left hand side of the assignment:
|
||||
```tsg
|
||||
; The left-hand side of the assignment, a `Name`.
|
||||
attr (@class.assign_lhs) variable = class_name
|
||||
attr (@class.assign_lhs) ctx = "store"
|
||||
```
|
||||
The `ClassExpr`:
|
||||
```tsg
|
||||
; The right hand side of the assignment, a `ClassExpr`.
|
||||
attr (@class.class_expr) name = class_name
|
||||
attr (@class.class_expr) inner_scope = @class.inner_scope
|
||||
; `bases` will be set elsewhere
|
||||
; `keywords` will be set elsewhere
|
||||
attr (@class.class_expr) _location_end = (location-end @colon)
|
||||
```
|
||||
|
||||
The `Class`:
|
||||
```tsg
|
||||
; The inner scope of the class_expr, a `Class`.
|
||||
attr (@class.inner_scope) name = class_name
|
||||
; body will be set elsewhere
|
||||
attr (@class.inner_scope) _location_end = (location-end @colon)
|
||||
|
||||
```
|
||||
|
||||
The remaining stanzas take care of setting up the fields that contain lists of nodes, and these
|
||||
follow the same scheme as before.
|
||||
```tsg
|
||||
; Class.body
|
||||
(class_definition
|
||||
body: (block (_) @stmt)
|
||||
) @class
|
||||
{
|
||||
edge @class.inner_scope -> @stmt.node
|
||||
attr (@class.inner_scope -> @stmt.node) body = (child-index @stmt)
|
||||
}
|
||||
|
||||
; Class.bases
|
||||
(class_definition
|
||||
superclasses: (argument_list (identifier) @arg)
|
||||
) @class
|
||||
{
|
||||
edge @class.class_expr -> @arg.node
|
||||
attr (@class.class_expr -> @arg.node) bases = (child-index @arg)
|
||||
attr (@arg.node) ctx = "load"
|
||||
}
|
||||
|
||||
; Class.keywords
|
||||
(class_definition
|
||||
superclasses: (argument_list (keyword_argument) @arg)
|
||||
) @class
|
||||
{
|
||||
edge @class.class_expr -> @arg.node
|
||||
attr (@class.class_expr -> @arg.node) keywords = (child-index @arg)
|
||||
}
|
||||
```
|
||||
3472
python/extractor/tsg-python/python.tsg
Normal file
3472
python/extractor/tsg-python/python.tsg
Normal file
File diff suppressed because it is too large
Load Diff
7
python/extractor/tsg-python/rust-toolchain.toml
Normal file
7
python/extractor/tsg-python/rust-toolchain.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file specifies the Rust version used to develop and test the Python
|
||||
# extractor. It is set to the lowest version of Rust we want to support.
|
||||
|
||||
[toolchain]
|
||||
channel = "1.68"
|
||||
profile = "minimal"
|
||||
components = [ "rustfmt" ]
|
||||
572
python/extractor/tsg-python/src/main.rs
Normal file
572
python/extractor/tsg-python/src/main.rs
Normal file
@@ -0,0 +1,572 @@
|
||||
// -*- coding: utf-8 -*-
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2021, GitHub.
|
||||
// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
|
||||
// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use anyhow::Context as _;
|
||||
use anyhow::Result;
|
||||
use clap::App;
|
||||
use clap::Arg;
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_graph::ast::File;
|
||||
use tree_sitter_graph::functions::Functions;
|
||||
use tree_sitter_graph::ExecutionConfig;
|
||||
use tree_sitter_graph::Identifier;
|
||||
use tree_sitter_graph::NoCancellation;
|
||||
use tree_sitter_graph::Variables;
|
||||
|
||||
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
pub mod extra_functions {
|
||||
use tree_sitter_graph::functions::{Function, Parameters};
|
||||
use tree_sitter_graph::graph::{Graph, Value};
|
||||
use tree_sitter_graph::{ExecutionError, Identifier};
|
||||
|
||||
pub struct Location;
|
||||
|
||||
fn get_location(node: Value, graph: &Graph) -> Result<Value, ExecutionError> {
|
||||
let node = graph[node.into_syntax_node_ref()?];
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
Ok(Value::List(
|
||||
vec![start.row, start.column, end.row, end.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
impl Function for Location {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = parameters.param()?;
|
||||
parameters.finish()?;
|
||||
get_location(node, graph)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LocationStart;
|
||||
|
||||
impl Function for LocationStart {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let start = node.start_position();
|
||||
Ok(Value::List(
|
||||
vec![start.row, start.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LocationEnd;
|
||||
|
||||
impl Function for LocationEnd {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let end = node.end_position();
|
||||
Ok(Value::List(
|
||||
vec![end.row, end.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AstNode;
|
||||
|
||||
impl Function for AstNode {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let tree_sitter_node = parameters.param()?;
|
||||
let kind = parameters.param()?;
|
||||
parameters.finish()?;
|
||||
let node = graph.add_graph_node();
|
||||
let loc = get_location(tree_sitter_node, graph)?;
|
||||
graph[node]
|
||||
.attributes
|
||||
.add(Identifier::from("_location"), loc)
|
||||
.map_err(|_| {
|
||||
ExecutionError::DuplicateAttribute(format!(
|
||||
" _location on graph node ({:?})",
|
||||
node
|
||||
))
|
||||
})?;
|
||||
graph[node]
|
||||
.attributes
|
||||
.add(Identifier::from("_kind"), kind)
|
||||
.map_err(|_| {
|
||||
ExecutionError::DuplicateAttribute(format!(" _kind on graph node ({:?})", node))
|
||||
})?;
|
||||
Ok(Value::GraphNode(node))
|
||||
}
|
||||
}
|
||||
|
||||
/// A struct representing the prefix on a Python string.
|
||||
struct Prefix {
|
||||
flags: String,
|
||||
quotes: String,
|
||||
}
|
||||
|
||||
impl Prefix {
|
||||
fn full(&self) -> String {
|
||||
format!("{}{}", self.flags, self.quotes)
|
||||
}
|
||||
|
||||
fn safe(&self) -> Prefix {
|
||||
Prefix {
|
||||
flags: self.flags.clone().replace("f", "").replace("F", ""),
|
||||
quotes: self.quotes.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_prefix(s: &str) -> Prefix {
|
||||
let flags_matcher = regex::Regex::new("^[bfurBFUR]{0,2}").unwrap();
|
||||
let mut end = 0;
|
||||
let flags = match flags_matcher.find(s) {
|
||||
Some(m) => {
|
||||
end = m.end();
|
||||
&s[m.start()..m.end()]
|
||||
}
|
||||
None => "",
|
||||
};
|
||||
let mut quotes = "";
|
||||
if s[end..].starts_with("\"\"\"") {
|
||||
quotes = "\"\"\"";
|
||||
} else if s[end..].starts_with("'''") {
|
||||
quotes = "'''";
|
||||
} else if s[end..].starts_with('"') {
|
||||
quotes = "\"";
|
||||
} else if s[end..].starts_with('\'') {
|
||||
quotes = "'";
|
||||
} else if s[end..].starts_with('}') {
|
||||
quotes = "}";
|
||||
}
|
||||
Prefix {
|
||||
flags: flags.to_lowercase().to_owned(),
|
||||
quotes: quotes.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_prefix() {
|
||||
let p = get_prefix("rb'''hello'''");
|
||||
assert_eq!(p.flags, "rb");
|
||||
assert_eq!(p.quotes, "'''");
|
||||
let p = get_prefix("Br\"\"\"hello\"\"\"");
|
||||
assert_eq!(p.flags, "Br");
|
||||
assert_eq!(p.quotes, "\"\"\"");
|
||||
let p = get_prefix("FR\"hello\"");
|
||||
assert_eq!(p.flags, "FR");
|
||||
assert_eq!(p.quotes, "\"");
|
||||
let p = get_prefix("uR'hello'");
|
||||
assert_eq!(p.flags, "uR");
|
||||
assert_eq!(p.quotes, "'");
|
||||
let p = get_prefix("''");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "'");
|
||||
let p = get_prefix("\"\"");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "\"");
|
||||
let p = get_prefix("\"\"\"\"\"\"");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "\"\"\"");
|
||||
}
|
||||
|
||||
fn get_string_contents(s: String) -> String {
|
||||
let prefix = get_prefix(&s);
|
||||
let contents = s.clone();
|
||||
let contents = contents.strip_prefix(prefix.full().as_str()).unwrap();
|
||||
let contents = contents.strip_suffix(prefix.quotes.as_str()).unwrap();
|
||||
|
||||
contents.to_owned()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_string_contents() {
|
||||
let s = "rb'''hello'''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "Br\"\"\"hello\"\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "FR\"hello\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "uR'hello'";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "\"\"\"\"\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "''''''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
}
|
||||
|
||||
pub struct StringPrefix;
|
||||
|
||||
impl Function for StringPrefix {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]).full();
|
||||
Ok(Value::String(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringContents;
|
||||
|
||||
impl Function for StringContents {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let contents = get_string_contents(source[node.byte_range()].to_owned());
|
||||
Ok(Value::String(contents))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringQuotes;
|
||||
|
||||
impl Function for StringQuotes {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]);
|
||||
Ok(Value::String(prefix.quotes))
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a version of the prefix that can be used in a call to `literal_eval`. To do so, we must remove
|
||||
// any `f` or `F` characters, if present.
|
||||
pub struct StringSafePrefix;
|
||||
|
||||
impl Function for StringSafePrefix {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]).full();
|
||||
let prefix = prefix.replace("f", "").replace("F", "");
|
||||
Ok(Value::String(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a version of the string where `f` and `F` have been stripped from the prefix.
|
||||
pub struct SafeString;
|
||||
|
||||
impl Function for SafeString {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]);
|
||||
let contents = get_string_contents(source[node.byte_range()].to_owned());
|
||||
let s = format!("{}{}{}", prefix.safe().full(), contents, prefix.quotes);
|
||||
Ok(Value::String(s))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnnamedChildIndex;
|
||||
|
||||
impl Function for UnnamedChildIndex {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let parent = match node.parent() {
|
||||
Some(parent) => parent,
|
||||
None => {
|
||||
return Err(ExecutionError::FunctionFailed(
|
||||
"unnamed-child-index".into(),
|
||||
format!("Cannot call child-index on the root node"),
|
||||
))
|
||||
}
|
||||
};
|
||||
let mut tree_cursor = parent.walk();
|
||||
let index = parent
|
||||
.children(&mut tree_cursor)
|
||||
.position(|child| child == node)
|
||||
.ok_or_else(|| {
|
||||
ExecutionError::FunctionFailed(
|
||||
"unnamed-child-index".into(),
|
||||
format!("Called child-index on a non-named child"),
|
||||
)
|
||||
})?;
|
||||
Ok(Value::Integer(index as u32))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ConcatenateStrings;
|
||||
|
||||
impl Function for ConcatenateStrings {
|
||||
fn call(
|
||||
&self,
|
||||
_graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let mut result = String::new();
|
||||
while let Ok(param) = parameters.param() {
|
||||
let string = param.into_string()?;
|
||||
result.push_str(string.as_str());
|
||||
}
|
||||
Ok(Value::String(result))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct InstanceOf;
|
||||
|
||||
impl Function for InstanceOf {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let class_name = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
let node_type = node.kind();
|
||||
let class_name = class_name.as_str();
|
||||
let is_instance = node_type == class_name;
|
||||
Ok(Value::Boolean(is_instance))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GetParent;
|
||||
|
||||
impl Function for GetParent {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let parent = node.parent().ok_or_else(|| {
|
||||
ExecutionError::FunctionFailed(
|
||||
"get-parent".into(),
|
||||
format!("Cannot call get-parent on the root node"),
|
||||
)
|
||||
})?;
|
||||
Ok(Value::SyntaxNode(graph.add_syntax_node(parent)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HasNamedChild;
|
||||
|
||||
impl Function for HasNamedChild {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let field_name = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
let field_name = field_name.as_str();
|
||||
let has_named_child = node.child_by_field_name(field_name).is_some();
|
||||
Ok(Value::Boolean(has_named_child))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IsBooleanOperator;
|
||||
|
||||
impl Function for IsBooleanOperator {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let expected_op_type = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
if let Some(op) = node.child_by_field_name("operator") {
|
||||
let op_type = source[op.byte_range()].to_string();
|
||||
let is_boolean_op = expected_op_type == op_type;
|
||||
Ok(Value::Boolean(is_boolean_op))
|
||||
} else {
|
||||
Ok(Value::Boolean(false))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Modulo;
|
||||
|
||||
impl Function for Modulo {
|
||||
fn call(
|
||||
&self,
|
||||
_graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let left = parameters.param()?.into_integer()?;
|
||||
let right = parameters.param()?.into_integer()?;
|
||||
parameters.finish()?;
|
||||
Ok(Value::Integer(left % right))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let matches = App::new("tsg-python")
|
||||
.version(BUILD_VERSION)
|
||||
.author("Taus Brock-Nannestad <tausbn@github.com>")
|
||||
.about("Extracts a Python AST from the parse tree given by tree-sitter-python")
|
||||
.arg(
|
||||
Arg::with_name("tsg")
|
||||
.short("t")
|
||||
.long("tsg")
|
||||
.takes_value(true)
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("source").index(1).required(true))
|
||||
.get_matches();
|
||||
|
||||
let tsg_path = if matches.is_present("tsg") {
|
||||
Path::new(matches.value_of("tsg").unwrap())
|
||||
.display()
|
||||
.to_string()
|
||||
} else {
|
||||
"bundled `python.tsg`".to_owned()
|
||||
};
|
||||
let source_path = Path::new(matches.value_of("source").unwrap());
|
||||
let language = tree_sitter_python::language();
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
// Statically include `python.tsg`:
|
||||
let tsg = if matches.is_present("tsg") {
|
||||
std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))?
|
||||
} else {
|
||||
include_bytes!("../python.tsg").to_vec()
|
||||
};
|
||||
let tsg = String::from_utf8(tsg)?;
|
||||
let source = std::fs::read(source_path)
|
||||
.with_context(|| format!("Error reading source file {}", source_path.display()))?;
|
||||
let source = String::from_utf8(source)?;
|
||||
let tree = parser
|
||||
.parse(&source, None)
|
||||
.ok_or_else(|| anyhow!("Could not parse {}", source_path.display()))?;
|
||||
let file = File::from_str(language, &tsg)
|
||||
.with_context(|| anyhow!("Error parsing TSG file {}", tsg_path))?;
|
||||
let mut functions = Functions::stdlib();
|
||||
functions.add(Identifier::from("location"), extra_functions::Location);
|
||||
functions.add(
|
||||
Identifier::from("location-start"),
|
||||
extra_functions::LocationStart,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("location-end"),
|
||||
extra_functions::LocationEnd,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("string-prefix"),
|
||||
extra_functions::StringPrefix,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("string-contents"),
|
||||
extra_functions::StringContents,
|
||||
);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("string-quotes"),
|
||||
extra_functions::StringQuotes,
|
||||
);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("string-safe-prefix"),
|
||||
extra_functions::StringSafePrefix,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("safe-string"), extra_functions::SafeString);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("unnamed-child-index"),
|
||||
extra_functions::UnnamedChildIndex,
|
||||
);
|
||||
functions.add(Identifier::from("ast-node"), extra_functions::AstNode);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("concatenate-strings"),
|
||||
extra_functions::ConcatenateStrings,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("instance-of"), extra_functions::InstanceOf);
|
||||
|
||||
functions.add(Identifier::from("get-parent"), extra_functions::GetParent);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("has-named-child"),
|
||||
extra_functions::HasNamedChild,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("is-boolean-operator"),
|
||||
extra_functions::IsBooleanOperator,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("mod"), extra_functions::Modulo);
|
||||
let globals = Variables::new();
|
||||
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
|
||||
let graph = file
|
||||
.execute(&tree, &source, &mut config, &NoCancellation)
|
||||
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
|
||||
print!("{}", graph.pretty_print());
|
||||
Ok(())
|
||||
}
|
||||
7
python/extractor/tsg-python/tree-sitter-python/.gitignore
vendored
Normal file
7
python/extractor/tsg-python/tree-sitter-python/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
Cargo.lock
|
||||
package-lock.json
|
||||
node_modules
|
||||
build
|
||||
*.log
|
||||
/examples/*/
|
||||
/target/
|
||||
@@ -0,0 +1,6 @@
|
||||
corpus
|
||||
examples
|
||||
build
|
||||
script
|
||||
target
|
||||
bindings/rust
|
||||
38
python/extractor/tsg-python/tree-sitter-python/BUILD.bazel
Normal file
38
python/extractor/tsg-python/tree-sitter-python/BUILD.bazel
Normal file
@@ -0,0 +1,38 @@
|
||||
load("@rules_rust//cargo:defs.bzl", "cargo_build_script")
|
||||
load("@rules_rust//rust:defs.bzl", "rust_library")
|
||||
load("@tsg_python_crate_index//:defs.bzl", "aliases", "all_crate_deps")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
# This will run the build script from the root of the workspace, and
|
||||
# collect the outputs.
|
||||
cargo_build_script(
|
||||
name = "tsg-build-script",
|
||||
srcs = ["bindings/rust/build.rs"],
|
||||
data = glob([
|
||||
"src/**",
|
||||
]),
|
||||
deps = all_crate_deps(
|
||||
build = True,
|
||||
),
|
||||
)
|
||||
|
||||
rust_library(
|
||||
name = "tree-sitter-python",
|
||||
srcs = [
|
||||
"bindings/rust/lib.rs",
|
||||
],
|
||||
aliases = aliases(),
|
||||
compile_data = glob([
|
||||
"src/**",
|
||||
"queries/**",
|
||||
]) + [
|
||||
"grammar.js",
|
||||
],
|
||||
proc_macro_deps = all_crate_deps(
|
||||
proc_macro = True,
|
||||
),
|
||||
deps = [":tsg-build-script"] + all_crate_deps(
|
||||
normal = True,
|
||||
),
|
||||
)
|
||||
31
python/extractor/tsg-python/tree-sitter-python/Cargo.toml
Normal file
31
python/extractor/tsg-python/tree-sitter-python/Cargo.toml
Normal file
@@ -0,0 +1,31 @@
|
||||
[package]
|
||||
name = "tree-sitter-python"
|
||||
description = "Python grammar for the tree-sitter parsing library"
|
||||
version = "0.19.0"
|
||||
authors = [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Douglas Creager <dcreager@dcreager.net>",
|
||||
]
|
||||
license = "MIT"
|
||||
readme = "bindings/rust/README.md"
|
||||
keywords = ["incremental", "parsing", "python"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/tree-sitter/tree-sitter-python"
|
||||
edition = "2018"
|
||||
|
||||
build = "bindings/rust/build.rs"
|
||||
include = [
|
||||
"bindings/rust/*",
|
||||
"grammar.js",
|
||||
"queries/*",
|
||||
"src/*",
|
||||
]
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
tree-sitter = ">= 0.20, < 0.21"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
21
python/extractor/tsg-python/tree-sitter-python/LICENSE
Normal file
21
python/extractor/tsg-python/tree-sitter-python/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Max Brunsfeld
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
13
python/extractor/tsg-python/tree-sitter-python/README.md
Normal file
13
python/extractor/tsg-python/tree-sitter-python/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
tree-sitter-python
|
||||
==================
|
||||
|
||||
[](https://github.com/tree-sitter/tree-sitter-python/actions/workflows/ci.yml)
|
||||
|
||||
Python grammar for [tree-sitter][].
|
||||
|
||||
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
|
||||
|
||||
#### References
|
||||
|
||||
* [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html)
|
||||
* [Python 3 Grammar](https://docs.python.org/3/reference/grammar.html)
|
||||
19
python/extractor/tsg-python/tree-sitter-python/binding.gyp
Normal file
19
python/extractor/tsg-python/tree-sitter-python/binding.gyp
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_python_binding",
|
||||
"include_dirs": [
|
||||
"<!(node -e \"require('nan')\")",
|
||||
"src"
|
||||
],
|
||||
"sources": [
|
||||
"src/parser.c",
|
||||
"bindings/node/binding.cc",
|
||||
"src/scanner.cc"
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c99",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
#include "tree_sitter/parser.h"
|
||||
#include <node.h>
|
||||
#include "nan.h"
|
||||
|
||||
using namespace v8;
|
||||
|
||||
extern "C" TSLanguage * tree_sitter_python();
|
||||
|
||||
namespace {
|
||||
|
||||
NAN_METHOD(New) {}
|
||||
|
||||
void Init(Local<Object> exports, Local<Object> module) {
|
||||
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
|
||||
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
|
||||
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
|
||||
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_python());
|
||||
|
||||
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("python").ToLocalChecked());
|
||||
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
|
||||
}
|
||||
|
||||
NODE_MODULE(tree_sitter_python_binding, Init)
|
||||
|
||||
} // namespace
|
||||
@@ -0,0 +1,19 @@
|
||||
try {
|
||||
module.exports = require("../../build/Release/tree_sitter_python_binding");
|
||||
} catch (error1) {
|
||||
if (error1.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error1;
|
||||
}
|
||||
try {
|
||||
module.exports = require("../../build/Debug/tree_sitter_python_binding");
|
||||
} catch (error2) {
|
||||
if (error2.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error2;
|
||||
}
|
||||
throw error1
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
module.exports.nodeTypeInfo = require("../../src/node-types.json");
|
||||
} catch (_) {}
|
||||
@@ -0,0 +1,36 @@
|
||||
# tree-sitter-python
|
||||
|
||||
This crate provides a Python grammar for the [tree-sitter][] parsing library.
|
||||
To use this crate, add it to the `[dependencies]` section of your `Cargo.toml`
|
||||
file. (Note that you will probably also need to depend on the
|
||||
[`tree-sitter`][tree-sitter crate] crate to use the parsed result in any useful
|
||||
way.)
|
||||
|
||||
``` toml
|
||||
[dependencies]
|
||||
tree-sitter = "0.17"
|
||||
tree-sitter-python = "0.17"
|
||||
```
|
||||
|
||||
Typically, you will use the [language][language func] function to add this
|
||||
grammar to a tree-sitter [Parser][], and then use the parser to parse some code:
|
||||
|
||||
``` rust
|
||||
let code = r#"
|
||||
def double(x):
|
||||
return x * 2
|
||||
"#;
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(tree_sitter_python::language()).expect("Error loading Python grammar");
|
||||
let parsed = parser.parse(code, None);
|
||||
```
|
||||
|
||||
If you have any questions, please reach out to us in the [tree-sitter
|
||||
discussions] page.
|
||||
|
||||
[Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
[language func]: https://docs.rs/tree-sitter-python/*/tree_sitter_python/fn.language.html
|
||||
[Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
|
||||
[tree-sitter]: https://tree-sitter.github.io/
|
||||
[tree-sitter crate]: https://crates.io/crates/tree-sitter
|
||||
[tree-sitter discussions]: https://github.com/tree-sitter/tree-sitter/discussions
|
||||
@@ -0,0 +1,28 @@
|
||||
use std::path::Path;
|
||||
extern crate cc;
|
||||
|
||||
fn main() {
|
||||
let src_dir = Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.include(&src_dir);
|
||||
c_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable")
|
||||
.flag_if_supported("-Wno-trigraphs");
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
c_config.compile("parser");
|
||||
|
||||
let mut cpp_config = cc::Build::new();
|
||||
cpp_config.cpp(true);
|
||||
cpp_config.include(&src_dir);
|
||||
cpp_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable");
|
||||
let scanner_path = src_dir.join("scanner.cc");
|
||||
cpp_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
cpp_config.compile("scanner");
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
// -*- coding: utf-8 -*-
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2020, tree-sitter-python authors.
|
||||
// See the LICENSE file in this repo for license details.
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
//! This crate provides a Python grammar for the [tree-sitter][] parsing library.
|
||||
//!
|
||||
//! Typically, you will use the [language][language func] function to add this grammar to a
|
||||
//! tree-sitter [Parser][], and then use the parser to parse some code:
|
||||
//!
|
||||
//! ```
|
||||
//! use tree_sitter::Parser;
|
||||
//!
|
||||
//! let code = r#"
|
||||
//! def double(x):
|
||||
//! return x * 2
|
||||
//! "#;
|
||||
//! let mut parser = Parser::new();
|
||||
//! parser.set_language(tree_sitter_python::language()).expect("Error loading Python grammar");
|
||||
//! let parsed = parser.parse(code, None);
|
||||
//! # let parsed = parsed.unwrap();
|
||||
//! # let root = parsed.root_node();
|
||||
//! # assert!(!root.has_error());
|
||||
//! ```
|
||||
//!
|
||||
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
//! [language func]: fn.language.html
|
||||
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
|
||||
//! [tree-sitter]: https://tree-sitter.github.io/
|
||||
|
||||
use tree_sitter::Language;
|
||||
|
||||
extern "C" {
|
||||
fn tree_sitter_python() -> Language;
|
||||
}
|
||||
|
||||
/// Returns the tree-sitter [Language][] for this grammar.
|
||||
///
|
||||
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
pub fn language() -> Language {
|
||||
unsafe { tree_sitter_python() }
|
||||
}
|
||||
|
||||
/// The source of the Python tree-sitter grammar description.
|
||||
pub const GRAMMAR: &'static str = include_str!("../../grammar.js");
|
||||
|
||||
/// The syntax highlighting query for this language.
|
||||
pub const HIGHLIGHT_QUERY: &'static str = include_str!("../../queries/highlights.scm");
|
||||
|
||||
/// The content of the [`node-types.json`][] file for this grammar.
|
||||
///
|
||||
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
|
||||
|
||||
/// The symbol tagging query for this language.
|
||||
pub const TAGGING_QUERY: &'static str = include_str!("../../queries/tags.scm");
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn can_load_grammar() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(super::language())
|
||||
.expect("Error loading Python grammar");
|
||||
}
|
||||
}
|
||||
1230
python/extractor/tsg-python/tree-sitter-python/grammar.js
Normal file
1230
python/extractor/tsg-python/tree-sitter-python/grammar.js
Normal file
File diff suppressed because it is too large
Load Diff
1687
python/extractor/tsg-python/tree-sitter-python/log.html
Normal file
1687
python/extractor/tsg-python/tree-sitter-python/log.html
Normal file
File diff suppressed because it is too large
Load Diff
33
python/extractor/tsg-python/tree-sitter-python/package.json
Normal file
33
python/extractor/tsg-python/tree-sitter-python/package.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "tree-sitter-python",
|
||||
"version": "0.19.0",
|
||||
"description": "Python grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"keywords": [
|
||||
"parser",
|
||||
"lexer"
|
||||
],
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nan": "^2.14.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tree-sitter-cli": "^0.19.3"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tree-sitter generate && node-gyp build",
|
||||
"test": "tree-sitter test && script/parse-examples",
|
||||
"parse": "tree-sitter parse",
|
||||
"test-windows": "tree-sitter test"
|
||||
},
|
||||
"repository": "https://github.com/tree-sitter/tree-sitter-python",
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.python",
|
||||
"file-types": [
|
||||
"py"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,124 @@
|
||||
; Identifier naming conventions
|
||||
|
||||
((identifier) @constructor
|
||||
(#match? @constructor "^[A-Z]"))
|
||||
|
||||
((identifier) @constant
|
||||
(#match? @constant "^[A-Z][A-Z_]*$"))
|
||||
|
||||
; Builtin functions
|
||||
|
||||
((call
|
||||
function: (identifier) @function.builtin)
|
||||
(#match?
|
||||
@function.builtin
|
||||
"^(abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|delattr|dict|dir|divmod|enumerate|eval|exec|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip|__import__)$"))
|
||||
|
||||
; Function calls
|
||||
|
||||
(decorator) @function
|
||||
|
||||
(call
|
||||
function: (attribute attribute: (identifier) @function.method))
|
||||
(call
|
||||
function: (identifier) @function)
|
||||
|
||||
; Function definitions
|
||||
|
||||
(function_definition
|
||||
name: (identifier) @function)
|
||||
|
||||
(identifier) @variable
|
||||
(attribute attribute: (identifier) @property)
|
||||
(type (identifier) @type)
|
||||
|
||||
; Literals
|
||||
|
||||
[
|
||||
(none)
|
||||
(true)
|
||||
(false)
|
||||
] @constant.builtin
|
||||
|
||||
[
|
||||
(integer)
|
||||
(float)
|
||||
] @number
|
||||
|
||||
(comment) @comment
|
||||
(string) @string
|
||||
(escape_sequence) @escape
|
||||
|
||||
(interpolation
|
||||
"{" @punctuation.special
|
||||
"}" @punctuation.special) @embedded
|
||||
|
||||
[
|
||||
"-"
|
||||
"-="
|
||||
"!="
|
||||
"*"
|
||||
"**"
|
||||
"**="
|
||||
"*="
|
||||
"/"
|
||||
"//"
|
||||
"//="
|
||||
"/="
|
||||
"&"
|
||||
"%"
|
||||
"%="
|
||||
"^"
|
||||
"+"
|
||||
"->"
|
||||
"+="
|
||||
"<"
|
||||
"<<"
|
||||
"<="
|
||||
"<>"
|
||||
"="
|
||||
":="
|
||||
"=="
|
||||
">"
|
||||
">="
|
||||
">>"
|
||||
"|"
|
||||
"~"
|
||||
"and"
|
||||
"in"
|
||||
"is"
|
||||
"not"
|
||||
"or"
|
||||
] @operator
|
||||
|
||||
[
|
||||
"as"
|
||||
"assert"
|
||||
"async"
|
||||
"await"
|
||||
"break"
|
||||
"class"
|
||||
"continue"
|
||||
"def"
|
||||
"del"
|
||||
"elif"
|
||||
"else"
|
||||
"except"
|
||||
"exec"
|
||||
"finally"
|
||||
"for"
|
||||
"from"
|
||||
"global"
|
||||
"if"
|
||||
"import"
|
||||
"lambda"
|
||||
"nonlocal"
|
||||
"pass"
|
||||
"print"
|
||||
"raise"
|
||||
"return"
|
||||
"try"
|
||||
"while"
|
||||
"with"
|
||||
"yield"
|
||||
] @keyword
|
||||
@@ -0,0 +1,12 @@
|
||||
(class_definition
|
||||
name: (identifier) @name) @definition.class
|
||||
|
||||
(function_definition
|
||||
name: (identifier) @name) @definition.function
|
||||
|
||||
(call
|
||||
function: [
|
||||
(identifier) @name
|
||||
(attribute
|
||||
attribute: (identifier) @name)
|
||||
]) @reference.call
|
||||
6615
python/extractor/tsg-python/tree-sitter-python/src/grammar.json
Normal file
6615
python/extractor/tsg-python/tree-sitter-python/src/grammar.json
Normal file
File diff suppressed because it is too large
Load Diff
4064
python/extractor/tsg-python/tree-sitter-python/src/node-types.json
Normal file
4064
python/extractor/tsg-python/tree-sitter-python/src/node-types.json
Normal file
File diff suppressed because it is too large
Load Diff
76504
python/extractor/tsg-python/tree-sitter-python/src/parser.c
Normal file
76504
python/extractor/tsg-python/tree-sitter-python/src/parser.c
Normal file
File diff suppressed because it is too large
Load Diff
402
python/extractor/tsg-python/tree-sitter-python/src/scanner.cc
Normal file
402
python/extractor/tsg-python/tree-sitter-python/src/scanner.cc
Normal file
@@ -0,0 +1,402 @@
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <cwctype>
|
||||
#include <stdio.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
#include <vector>
|
||||
namespace {
|
||||
|
||||
using std::vector;
|
||||
using std::iswspace;
|
||||
using std::memcpy;
|
||||
|
||||
enum TokenType {
|
||||
NEWLINE,
|
||||
INDENT,
|
||||
DEDENT,
|
||||
STRING_START,
|
||||
STRING_CONTENT,
|
||||
STRING_END,
|
||||
};
|
||||
|
||||
struct Delimiter {
|
||||
enum {
|
||||
SingleQuote = 1 << 0,
|
||||
DoubleQuote = 1 << 1,
|
||||
BackQuote = 1 << 2,
|
||||
Raw = 1 << 3,
|
||||
Format = 1 << 4,
|
||||
Triple = 1 << 5,
|
||||
Bytes = 1 << 6,
|
||||
};
|
||||
|
||||
Delimiter() : flags(0) {}
|
||||
|
||||
bool is_format() const {
|
||||
return flags & Format;
|
||||
}
|
||||
|
||||
bool is_raw() const {
|
||||
return flags & Raw;
|
||||
}
|
||||
|
||||
bool is_triple() const {
|
||||
return flags & Triple;
|
||||
}
|
||||
|
||||
bool is_bytes() const {
|
||||
return flags & Bytes;
|
||||
}
|
||||
|
||||
int32_t end_character() const {
|
||||
if (flags & SingleQuote) return '\'';
|
||||
if (flags & DoubleQuote) return '"';
|
||||
if (flags & BackQuote) return '`';
|
||||
return 0;
|
||||
}
|
||||
|
||||
void set_format() {
|
||||
flags |= Format;
|
||||
}
|
||||
|
||||
void set_raw() {
|
||||
flags |= Raw;
|
||||
}
|
||||
|
||||
void set_triple() {
|
||||
flags |= Triple;
|
||||
}
|
||||
|
||||
void set_bytes() {
|
||||
flags |= Bytes;
|
||||
}
|
||||
|
||||
void set_end_character(int32_t character) {
|
||||
switch (character) {
|
||||
case '\'':
|
||||
flags |= SingleQuote;
|
||||
break;
|
||||
case '"':
|
||||
flags |= DoubleQuote;
|
||||
break;
|
||||
case '`':
|
||||
flags |= BackQuote;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
char flags;
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
Scanner() {
|
||||
assert(sizeof(Delimiter) == sizeof(char));
|
||||
deserialize(NULL, 0);
|
||||
}
|
||||
|
||||
unsigned serialize(char *buffer) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t delimiter_count = delimiter_stack.size();
|
||||
if (delimiter_count > UINT8_MAX) delimiter_count = UINT8_MAX;
|
||||
buffer[i++] = delimiter_count;
|
||||
|
||||
if (delimiter_count > 0) {
|
||||
memcpy(&buffer[i], delimiter_stack.data(), delimiter_count);
|
||||
}
|
||||
i += delimiter_count;
|
||||
|
||||
vector<uint16_t>::iterator
|
||||
iter = indent_length_stack.begin() + 1,
|
||||
end = indent_length_stack.end();
|
||||
|
||||
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
||||
buffer[i++] = *iter;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(const char *buffer, unsigned length) {
|
||||
delimiter_stack.clear();
|
||||
indent_length_stack.clear();
|
||||
indent_length_stack.push_back(0);
|
||||
|
||||
if (length > 0) {
|
||||
size_t i = 0;
|
||||
|
||||
size_t delimiter_count = (uint8_t)buffer[i++];
|
||||
delimiter_stack.resize(delimiter_count);
|
||||
if (delimiter_count > 0) {
|
||||
memcpy(delimiter_stack.data(), &buffer[i], delimiter_count);
|
||||
}
|
||||
i += delimiter_count;
|
||||
|
||||
for (; i < length; i++) {
|
||||
indent_length_stack.push_back(buffer[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void advance(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
void skip(TSLexer *lexer) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
if (valid_symbols[STRING_CONTENT] && !valid_symbols[INDENT] && !delimiter_stack.empty()) {
|
||||
Delimiter delimiter = delimiter_stack.back();
|
||||
int32_t end_character = delimiter.end_character();
|
||||
bool has_content = false;
|
||||
while (lexer->lookahead) {
|
||||
if ((lexer->lookahead == '{' || lexer->lookahead == '}') && delimiter.is_format()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
if (delimiter.is_raw()) {
|
||||
lexer->advance(lexer, false);
|
||||
continue;
|
||||
} else if (delimiter.is_bytes()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') {
|
||||
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are not escape sequences
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return has_content;
|
||||
}
|
||||
} else if (lexer->lookahead == end_character) {
|
||||
if (delimiter.is_triple()) {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == end_character) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == end_character) {
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter_stack.pop_back();
|
||||
lexer->result_symbol = STRING_END;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
lexer->mark_end(lexer);
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (has_content) {
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
} else {
|
||||
lexer->advance(lexer, false);
|
||||
delimiter_stack.pop_back();
|
||||
lexer->result_symbol = STRING_END;
|
||||
}
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
} else if (lexer->lookahead == '\n' && has_content && !delimiter.is_triple()) {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
has_content = true;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
bool found_end_of_line = false;
|
||||
uint32_t indent_length = 0;
|
||||
int32_t first_comment_indent_length = -1;
|
||||
for (;;) {
|
||||
if (lexer->lookahead == '\n') {
|
||||
found_end_of_line = true;
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == ' ') {
|
||||
indent_length++;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\r') {
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '\t') {
|
||||
indent_length += 8;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == '#') {
|
||||
if (first_comment_indent_length == -1) {
|
||||
first_comment_indent_length = (int32_t)indent_length;
|
||||
}
|
||||
while (lexer->lookahead && lexer->lookahead != '\n') {
|
||||
skip(lexer);
|
||||
}
|
||||
skip(lexer);
|
||||
indent_length = 0;
|
||||
} else if (lexer->lookahead == '\\') {
|
||||
skip(lexer);
|
||||
if (lexer->lookahead == '\r') {
|
||||
skip(lexer);
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
skip(lexer);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (lexer->lookahead == '\f') {
|
||||
indent_length = 0;
|
||||
skip(lexer);
|
||||
} else if (lexer->lookahead == 0) {
|
||||
indent_length = 0;
|
||||
found_end_of_line = true;
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_end_of_line) {
|
||||
if (!indent_length_stack.empty()) {
|
||||
uint16_t current_indent_length = indent_length_stack.back();
|
||||
|
||||
if (
|
||||
valid_symbols[INDENT] &&
|
||||
indent_length > current_indent_length
|
||||
) {
|
||||
indent_length_stack.push_back(indent_length);
|
||||
lexer->result_symbol = INDENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
valid_symbols[DEDENT] &&
|
||||
indent_length < current_indent_length &&
|
||||
|
||||
// Wait to create a dedent token until we've consumed any comments
|
||||
// whose indentation matches the current block.
|
||||
first_comment_indent_length < (int32_t)current_indent_length
|
||||
) {
|
||||
indent_length_stack.pop_back();
|
||||
lexer->result_symbol = DEDENT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_symbols[NEWLINE]) {
|
||||
lexer->result_symbol = NEWLINE;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
|
||||
Delimiter delimiter;
|
||||
|
||||
bool has_flags = false;
|
||||
while (lexer->lookahead) {
|
||||
if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
|
||||
delimiter.set_format();
|
||||
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
|
||||
delimiter.set_raw();
|
||||
} else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
|
||||
delimiter.set_bytes();
|
||||
} else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
|
||||
break;
|
||||
}
|
||||
has_flags = true;
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '`') {
|
||||
delimiter.set_end_character('`');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
} else if (lexer->lookahead == '\'') {
|
||||
delimiter.set_end_character('\'');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '\'') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter.set_triple();
|
||||
}
|
||||
}
|
||||
} else if (lexer->lookahead == '"') {
|
||||
delimiter.set_end_character('"');
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '"') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
delimiter.set_triple();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (delimiter.end_character()) {
|
||||
delimiter_stack.push_back(delimiter);
|
||||
lexer->result_symbol = STRING_START;
|
||||
return true;
|
||||
} else if (has_flags) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<uint16_t> indent_length_stack;
|
||||
vector<Delimiter> delimiter_stack;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *tree_sitter_python_external_scanner_create() {
|
||||
return new Scanner();
|
||||
}
|
||||
|
||||
bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->serialize(buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
scanner->deserialize(buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_python_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
#ifndef TREE_SITTER_PARSER_H_
|
||||
#define TREE_SITTER_PARSER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
bool named;
|
||||
bool supertype;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
TSParseActionTypeReduce,
|
||||
TSParseActionTypeAccept,
|
||||
TSParseActionTypeRecover,
|
||||
} TSParseActionType;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t type;
|
||||
TSStateId state;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
} shift;
|
||||
struct {
|
||||
uint8_t type;
|
||||
uint8_t child_count;
|
||||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint16_t production_id;
|
||||
} reduce;
|
||||
uint8_t type;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
uint8_t count;
|
||||
bool reusable;
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
uint32_t state_count;
|
||||
uint32_t large_state_count;
|
||||
uint32_t production_id_count;
|
||||
uint32_t field_count;
|
||||
uint16_t max_alias_sequence_length;
|
||||
const uint16_t *parse_table;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char * const *symbol_names;
|
||||
const char * const *field_names;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
result = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
#define END_STATE() return result;
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_REPEAT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value, \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeRecover \
|
||||
}}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeAccept \
|
||||
}}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSER_H_
|
||||
60
python/extractor/tsg-python/tsg_to_dot.py
Normal file
60
python/extractor/tsg-python/tsg_to_dot.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Convert output of tree-sitter-graph to dot format.
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
# regular expression to match a node
|
||||
node_re = re.compile(r"node (?P<id>\d+)")
|
||||
|
||||
# regular expression to match an edge
|
||||
edge_re = re.compile(r"edge (?P<from>\d+) -> (?P<to>\d+)")
|
||||
|
||||
# regular expression to match a property
|
||||
prop_re = re.compile(r"\s+(?P<key>\w+): (?P<value>.*)")
|
||||
|
||||
# regular expression to match a link: "[graph node n]"
|
||||
link_re = re.compile(r"\[graph node (?P<id>\d+)\]")
|
||||
|
||||
with open(sys.argv[1], 'r') as f, open(sys.argv[2], 'w') as out:
|
||||
out.write("digraph G {\n")
|
||||
label = []
|
||||
inside = False
|
||||
node_id = 0
|
||||
links = {}
|
||||
for line in f:
|
||||
|
||||
m = node_re.match(line)
|
||||
if m:
|
||||
if inside:
|
||||
out.write('\\n'.join(label) + "\"];\n")
|
||||
for k, v in links.items():
|
||||
out.write("{} -> {} [label=\"{}\"];\n".format(node_id, v, k))
|
||||
out.write("{id} [label=\"".format(**m.groupdict()))
|
||||
label = ["id={id}".format(**m.groupdict())]
|
||||
inside = True
|
||||
node_id = m.group('id')
|
||||
links = {}
|
||||
|
||||
m = edge_re.match(line)
|
||||
if m:
|
||||
if inside:
|
||||
out.write('\\n'.join(label) + "\"];\n")
|
||||
for k, v in links.items():
|
||||
out.write("{} -> {} [label=\"{}\"];\n".format(node_id, v, k))
|
||||
out.write("{from} -> {to} [label=\"".format(**m.groupdict()))
|
||||
label = []
|
||||
inside = True
|
||||
node_id = 0
|
||||
links = {}
|
||||
|
||||
m = prop_re.match(line)
|
||||
if m:
|
||||
# escape quotes in value
|
||||
label.append("{key}={value}".format(**m.groupdict()).replace('"', '\\"').replace('\\\\"', ''))
|
||||
l = link_re.match(m.group('value'))
|
||||
if l:
|
||||
links[m.group('key')] = l.group('id')
|
||||
out.write('\\n'.join(label) + "\"];\n")
|
||||
for k, v in links.items():
|
||||
out.write("{} -> {} [label=\"{}\"];\n".format(node_id, v, k))
|
||||
out.write("}\n")
|
||||
Reference in New Issue
Block a user