Bazel: remove non-working fake tree-sitter-extractor workaround

The `.cargo/config.toml` override based workaround wasn't really
working, as while `cargo build|check` was reading that, `cargo metadata`
wasn't, ending up in a completely broken IDE experience.

For the moment, we just use a unified workspace `Cargo.toml` for all
extractors using the shared tree-sitter code, which has the downside of
making bazel pull in dependencies for all of them, and not being able to
do sparse checkouts for them. We should investigate and rivist this in
the future.
This commit is contained in:
Paolo Tranquilli
2024-09-11 08:17:11 +02:00
parent 4f90f5fb4c
commit f8c9d96882
21 changed files with 80 additions and 125 deletions

3
.gitignore vendored
View File

@@ -68,3 +68,6 @@ node_modules/
# Jetbrains IDE files
.idea
# cargo build directory
/target

View File

@@ -92,9 +92,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.86"
version = "1.0.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8"
[[package]]
name = "arrayvec"
@@ -153,9 +153,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytemuck"
version = "1.17.1"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773d90827bc3feecfb67fab12e24de0749aad83c74b9504ecde46237b5cd24e2"
checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae"
[[package]]
name = "byteorder"
@@ -197,9 +197,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.1.16"
version = "1.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9d013ecb737093c0e86b151a7b837993cf9ec6c502946cfb44bedc392421e0b"
checksum = "b62ac837cdb5cb22e10a256099b4fc502b1dfe560cb282963a974d7abd80e476"
dependencies = [
"shlex",
]
@@ -338,6 +338,23 @@ dependencies = [
"tree-sitter-ql",
]
[[package]]
name = "codeql-extractor-ruby"
version = "0.1.0"
dependencies = [
"clap",
"codeql-extractor",
"encoding",
"lazy_static",
"rayon",
"regex",
"tracing",
"tracing-subscriber",
"tree-sitter",
"tree-sitter-embedded-template",
"tree-sitter-ruby",
]
[[package]]
name = "codeql-rust"
version = "0.1.0"
@@ -654,9 +671,9 @@ dependencies = [
[[package]]
name = "globset"
version = "0.4.14"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1"
checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19"
dependencies = [
"aho-corasick",
"bstr",
@@ -1911,18 +1928,18 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.209"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.209"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",
@@ -2203,6 +2220,15 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-embedded-template"
version = "0.23.0"
source = "git+https://github.com/tree-sitter/tree-sitter-embedded-template.git?rev=62b0a6e45900a7dff7c37da95fec20a09968ba52#62b0a6e45900a7dff7c37da95fec20a09968ba52"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-json"
version = "0.23.0"
@@ -2227,6 +2253,15 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-ruby"
version = "0.23.0"
source = "git+https://github.com/tree-sitter/tree-sitter-ruby.git?rev=a66579f70d6f50ffd81a16fc3d3358e2ac173c88#a66579f70d6f50ffd81a16fc3d3358e2ac173c88"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "triomphe"
version = "0.1.13"
@@ -2254,9 +2289,9 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.12"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "unicode-properties"

14
Cargo.toml Normal file
View File

@@ -0,0 +1,14 @@
# This is the shared workspace file for extractor using shared/tree-sitter/extractor
[workspace]
resolver = "2"
members = [
"shared/tree-sitter-extractor",
"ruby/extractor",
"rust/extractor",
]
[patch.crates-io]
# patch for build script bug preventing bazel build
# see https://github.com/rust-lang/rustc_apfloat/pull/17
rustc_apfloat = { git = "https://github.com/redsun82/rustc_apfloat.git", rev = "096d585100636bc2e9f09d7eefec38c5b334d47b" }

View File

@@ -47,35 +47,23 @@ cp.from_cargo(
)
use_repo(cp, "py_deps")
# crate_ruby, but shortened due to windows file paths
# deps for ruby+rust, but shortened due to windows file paths
r = use_extension(
"@rules_rust//crate_universe:extension.bzl",
"crate",
isolate = True,
)
r.from_cargo(
name = "rd",
cargo_lockfile = "//ruby/extractor:Cargo.lock",
name = "r",
cargo_lockfile = "//:Cargo.lock",
manifests = [
"//:Cargo.toml",
"//ruby/extractor:Cargo.toml",
"//ruby/extractor/codeql-extractor-fake-crate:Cargo.toml",
],
)
use_repo(r, ruby_deps = "rd")
rsp = use_extension(
"@rules_rust//crate_universe:extension.bzl",
"crate",
isolate = True,
)
rsp.from_cargo(
name = "rs_deps",
cargo_lockfile = "//rust/extractor:Cargo.lock",
manifests = [
"//rust/extractor:Cargo.toml",
"//shared/tree-sitter-extractor:Cargo.toml",
],
)
use_repo(rsp, rust_deps = "rs_deps")
use_repo(r, tree_sitter_extractors_deps = "r")
dotnet = use_extension("@rules_dotnet//dotnet:extensions.bzl", "dotnet")
dotnet.toolchain(dotnet_version = "8.0.101")

View File

@@ -355,10 +355,5 @@
"Python model summaries test extension": [
"python/ql/test/library-tests/dataflow/model-summaries/InlineTaintTest.ext.yml",
"python/ql/test/library-tests/dataflow/model-summaries/NormalDataflowTest.ext.yml"
],
"shared tree-sitter extractor cargo.toml": [
"shared/tree-sitter-extractor/Cargo.toml",
"ruby/extractor/codeql-extractor-fake-crate/Cargo.toml",
"rust/extractor/codeql-extractor-fake-crate/Cargo.toml"
]
}

View File

@@ -1 +0,0 @@
paths = ["../../shared/tree-sitter-extractor"]

View File

@@ -1,4 +1,4 @@
load("@ruby_deps//:defs.bzl", "aliases", "all_crate_deps")
load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps")
load("//misc/bazel:rust.bzl", "codeql_rust_binary")
codeql_rust_binary(

Binary file not shown.

View File

@@ -1,4 +1,3 @@
[workspace]
[package]
name = "codeql-extractor-ruby"
description = "CodeQL Ruby extractor"
@@ -16,8 +15,6 @@ tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
rayon = "1.5.0"
regex = "1.7.1"
encoding = "0.2"
lazy_static = "1.4.0"
# Ideally, we'd like to pull this in via a relative path.
# However, our bazel/rust tooling chokes on this, c.f. https://github.com/bazelbuild/rules_rust/issues/1525
# Therefore, we have a pretty bad hack in place instead, see README.md in the codeql-extractor-fake-crate directory.
codeql-extractor = { path = "codeql-extractor-fake-crate" }
lazy_static = "1.4.0"
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }

View File

@@ -1,26 +0,0 @@
[package]
name = "codeql-extractor"
version = "0.2.0"
edition = "2021"
authors = ["GitHub"]
[dependencies]
flate2 = "1.0"
globset = "0.4"
tree-sitter = ">= 0.23.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
rayon = "1.5.0"
regex = "1.7.1"
encoding = "0.2"
lazy_static = "1.4.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4.19", features = ["serde"] }
num_cpus = "1.14.0"
[dev-dependencies]
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql" }
tree-sitter-json = {git = "https://github.com/tree-sitter/tree-sitter-json" }
rand = "0.8.5"

View File

@@ -1,7 +0,0 @@
We're presenting a fake crate in this workspace that ensures that the correct crate dependencies from the shared tree sitter
extractor can be parsed by Bazel (which doesn't resolve path dependencies outside of the cargo workspace unfortunately).
The sync-identical-files script keeps this up-to-date.
For local development and IDEs, we override the path to `codeql-extractor` using the `.cargo/config.toml` mechanism.
Bazel doesn't actually do anything with path dependencies except to pull in their dependency tree, so we manually
specify the dependency from the ruby extractor to the shared extractor in `BUILD.bazel`.

View File

@@ -1 +0,0 @@
paths = ["../../shared/tree-sitter-extractor"]

View File

@@ -1,4 +1,4 @@
load("@rust_deps//:defs.bzl", "aliases", "all_crate_deps")
load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps")
load("//misc/bazel:rust.bzl", "codeql_rust_binary")
codeql_rust_binary(

View File

@@ -1,5 +1,3 @@
[workspace]
[package]
name = "codeql-rust"
version = "0.1.0"
@@ -27,9 +25,4 @@ triomphe = "0.1.13"
# Ideally, we'd like to pull this in via a relative path.
# However, our bazel/rust tooling chokes on this, c.f. https://github.com/bazelbuild/rules_rust/issues/1525
# Therefore, we have a pretty bad hack in place instead, see README.md in the codeql-extractor-fake-crate directory.
codeql-extractor = { path = "codeql-extractor-fake-crate" }
[patch.crates-io]
# patch for build script bug preventing bazel build
# see https://github.com/rust-lang/rustc_apfloat/pull/17
rustc_apfloat = { git = "https://github.com/redsun82/rustc_apfloat.git", rev = "096d585100636bc2e9f09d7eefec38c5b334d47b" }
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }

View File

@@ -1,26 +0,0 @@
[package]
name = "codeql-extractor"
version = "0.2.0"
edition = "2021"
authors = ["GitHub"]
[dependencies]
flate2 = "1.0"
globset = "0.4"
tree-sitter = ">= 0.23.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
rayon = "1.5.0"
regex = "1.7.1"
encoding = "0.2"
lazy_static = "1.4.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
chrono = { version = "0.4.19", features = ["serde"] }
num_cpus = "1.14.0"
[dev-dependencies]
tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql" }
tree-sitter-json = {git = "https://github.com/tree-sitter/tree-sitter-json" }
rand = "0.8.5"

View File

@@ -1,7 +0,0 @@
We're presenting a fake crate in this workspace that ensures that the correct crate dependencies from the shared tree sitter
extractor can be parsed by Bazel (which doesn't resolve path dependencies outside of the cargo workspace unfortunately).
The sync-identical-files script keeps this up-to-date.
For local development and IDEs, we override the path to `codeql-extractor` using the `.cargo/config.toml` mechanism.
Bazel doesn't actually do anything with path dependencies except to pull in their dependency tree, so we manually
specify the dependency from the ruby extractor to the shared extractor in `BUILD.bazel`.

View File

@@ -1,5 +1,5 @@
load("@ruby_deps//:defs.bzl", "aliases", "all_crate_deps")
load("@rules_rust//rust:defs.bzl", "rust_library")
load("@tree_sitter_extractors_deps//:defs.bzl", "aliases", "all_crate_deps")
package(default_visibility = ["//visibility:public"])
@@ -12,5 +12,5 @@ rust_library(
compile_data = [
"src/generator/prefix.dbscheme",
],
deps = all_crate_deps(package_name = "ruby/extractor/codeql-extractor-fake-crate"),
deps = all_crate_deps(),
)