From 68d1cbb2d54928fc68817c08c9bc4ede63110170 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Tue, 7 May 2024 08:38:34 +0200 Subject: [PATCH] Tree-sitter: Bump to 0.22.6 --- python/extractor/tsg-python/Cargo.toml | 2 +- ql/Cargo.lock | Bin 34042 -> 34047 bytes ql/buramu/tree-sitter-blame/Cargo.toml | 2 +- ql/extractor/Cargo.toml | 10 +++--- ql/ql/src/codeql_ql/StructuredLogs.qll | 18 +++++++--- .../src/codeql_ql/ast/internal/TreeSitter.qll | 24 ++++++++----- ql/ql/src/ql.dbscheme | 33 ++++++++++-------- ql/rust-toolchain.toml | 4 +-- ruby/extractor/Cargo.lock | Bin 32436 -> 32214 bytes ruby/extractor/Cargo.toml | 10 +++--- ruby/extractor/rust-toolchain.toml | 2 +- ruby/extractor/src/extractor.rs | 8 ++--- shared/tree-sitter-extractor/Cargo.toml | 4 +-- .../tree-sitter-extractor/rust-toolchain.toml | 2 +- .../src/extractor/mod.rs | 2 +- .../src/extractor/simple.rs | 2 +- 16 files changed, 71 insertions(+), 52 deletions(-) diff --git a/python/extractor/tsg-python/Cargo.toml b/python/extractor/tsg-python/Cargo.toml index ac4d4093189..1266f94f2b2 100644 --- a/python/extractor/tsg-python/Cargo.toml +++ b/python/extractor/tsg-python/Cargo.toml @@ -7,7 +7,7 @@ authors = ["Taus Brock-Nannestad "] edition = "2018" # When changing/updating these, the `Cargo.Bazel.lock` file has to be regenerated. -# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./build --bazel sync --only=py_deps` +# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=py_deps ./tools/bazel sync --only=py_deps` # in the `semmle-code` repository to do so. # For more information, check out the documentation at # https://bazelbuild.github.io/rules_rust/crate_universe.html#repinning--updating-dependencies diff --git a/ql/Cargo.lock b/ql/Cargo.lock index 0db453809c9f47475acc6936e1c091553aa0cbbe..9ee8f4318ea3c590471cfe3df67cc21ad94ac5b7 100644 GIT binary patch delta 503 zcma)&F-x355Qgy#2+2WEFhWRUPAx2s-PxJh-9oQPmqP9JJ9lSxLqrslU~PL~x4$74 zRuS8P_%l-F59Al5iC=1)E8pQ^-iP<^Z18V3`0-&lh}`aW_a{sVBCu6*IS~Sz=z$^D zD2a2eiDOP;ov5uv*&>+g`)kOthOBKlt=<0_e%xDxF}KI1y@^&Yf-y=-s;JWEBCIG` z92O}N#MNYc09zpVRJ4d9S{Aab((Klo_WSU5!M3Yzr#tz2em4H}`P<}6x!j3ZNeFy~ z$b?LhSil(b-^-yYpr#7RkLCs21yQzE(2D XDLAj_B~)t>P04OFCwRYp{A=S03vZTC delta 556 zcma)&yKYlK5Qc@3xEvxO3Lqtlfda|N&d%=aPT?e=bQFXdu-VIOFo~RSY>-l=c9miu zAukXqF8~jK1P?*S6VSlnB5F5Nj6UiA|L^Cb_hZpJ-&yH7u3v|{n<}GXMcNZ+$ZVrv zHjXV~G}agI3}ZwUOti&uAmwuO#b$sI80xL#)r;>dPoG^@>rkQD-4bV9gu>B9Mv#mg ztnmp-5HJj?J1z@$kK{88_(VEEAWGyhV|~1Hr`~*YwSIefc1hL6tA{7y{VYy5=B>zJ zHkr?*9lV_#PS^J)^Q~4s44tt=D8lRtSVu)b9C#;$#)--iNs*w+H=iOdFW6)wD{H@vhSYpy81TD^YrZq$j*k2Z` TpPJkMMn0&6aP8vg_38azCFi5i diff --git a/ql/buramu/tree-sitter-blame/Cargo.toml b/ql/buramu/tree-sitter-blame/Cargo.toml index 95dc42c3e74..963691a7349 100644 --- a/ql/buramu/tree-sitter-blame/Cargo.toml +++ b/ql/buramu/tree-sitter-blame/Cargo.toml @@ -20,7 +20,7 @@ include = [ path = "bindings/rust/lib.rs" [dependencies] -tree-sitter = "~0.20.3" +tree-sitter = ">= 0.22.6" [build-dependencies] cc = "1.0" diff --git a/ql/extractor/Cargo.toml b/ql/extractor/Cargo.toml index 1155cc3c698..af05c7d2086 100644 --- a/ql/extractor/Cargo.toml +++ b/ql/extractor/Cargo.toml @@ -2,16 +2,16 @@ name = "codeql-extractor-ql" version = "0.1.0" authors = ["GitHub"] -edition = "2018" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -tree-sitter = ">= 0.20, < 0.21" -tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", rev = "d08db734f8dc52f6bc04db53a966603122bc6985"} -tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"} +tree-sitter = ">= 0.22.6" +tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", rev = "fa5c3821dd2161f5c8528a8cbdb258daa6dc4de6"} +tree-sitter-ql-dbscheme = { git = "https://github.com/tree-sitter/tree-sitter-ql-dbscheme.git", rev = "5f770f57fa415607ff50e3d237d47c8f11440eb3"} tree-sitter-blame = {path = "../buramu/tree-sitter-blame"} -tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json.git", rev = "745663ee997f1576fe1e7187e6347e0db36ec7a9"} +tree-sitter-json = {git = "https://github.com/tree-sitter/tree-sitter-json.git", rev = "94f5c527b2965465956c2000ed6134dd24daf2a7"} clap = { version = "4.2", features = ["derive"] } tracing = "0.1" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/ql/ql/src/codeql_ql/StructuredLogs.qll b/ql/ql/src/codeql_ql/StructuredLogs.qll index 60ea7e014ed..4c0ef69fd55 100644 --- a/ql/ql/src/codeql_ql/StructuredLogs.qll +++ b/ql/ql/src/codeql_ql/StructuredLogs.qll @@ -31,15 +31,25 @@ private Predicate getPredicateFromPosition(string s) { ) } +pragma[nomagic] +private string getJsonStringComponent(JSON::String s, int i) { + result = s.getChild(i).(JSON::Token).getValue() +} + +pragma[nomagic] +private string getJsonString(JSON::String s) { + result = concat(string c, int i | c = getJsonStringComponent(s, i) | c order by i) +} + class Object extends JSON::Object { - JSON::Value getValue(string key) { + JSON::UnderscoreValue getValue(string key) { exists(JSON::Pair p | p = this.getChild(_) | - key = p.getKey().(JSON::String).getChild().getValue() and + key = getJsonString(p.getKey()) and result = p.getValue() ) } - string getString(string key) { result = this.getValue(key).(JSON::String).getChild().getValue() } + string getString(string key) { result = getJsonString(this.getValue(key)) } int getNumber(string key) { result = this.getValue(key).(JSON::Number).getValue().toInt() } @@ -61,7 +71,7 @@ class Object extends JSON::Object { class Array extends JSON::Array { Object getObject(int i) { result = this.getChild(i) } - string getString(int i) { result = this.getChild(i).(JSON::String).getChild().getValue() } + string getString(int i) { result = getJsonString(this.getChild(i)) } int getNumber(int i) { result = this.getChild(i).(JSON::Number).getValue().toInt() } diff --git a/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll b/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll index 877f676e396..562af993d89 100644 --- a/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll +++ b/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll @@ -1767,13 +1767,15 @@ module JSON { final override string getAPrimaryQlClass() { result = "ReservedWord" } } + class UnderscoreValue extends @json_underscore_value, AstNode { } + /** A class representing `array` nodes. */ class Array extends @json_array, AstNode { /** Gets the name of the primary QL class for this element. */ final override string getAPrimaryQlClass() { result = "Array" } /** Gets the `i`th child of this node. */ - final Value getChild(int i) { json_array_child(this, i, result) } + final UnderscoreValue getChild(int i) { json_array_child(this, i, result) } /** Gets a field or child node of this node. */ final override AstNode getAFieldOrChild() { json_array_child(this, _, result) } @@ -1791,12 +1793,18 @@ module JSON { final override string getAPrimaryQlClass() { result = "Document" } /** Gets the `i`th child of this node. */ - final Value getChild(int i) { json_document_child(this, i, result) } + final UnderscoreValue getChild(int i) { json_document_child(this, i, result) } /** Gets a field or child node of this node. */ final override AstNode getAFieldOrChild() { json_document_child(this, _, result) } } + /** A class representing `escape_sequence` tokens. */ + class EscapeSequence extends @json_token_escape_sequence, Token { + /** Gets the name of the primary QL class for this element. */ + final override string getAPrimaryQlClass() { result = "EscapeSequence" } + } + /** A class representing `false` tokens. */ class False extends @json_token_false, Token { /** Gets the name of the primary QL class for this element. */ @@ -1833,10 +1841,10 @@ module JSON { final override string getAPrimaryQlClass() { result = "Pair" } /** Gets the node corresponding to the field `key`. */ - final AstNode getKey() { json_pair_def(this, result, _) } + final String getKey() { json_pair_def(this, result, _) } /** Gets the node corresponding to the field `value`. */ - final Value getValue() { json_pair_def(this, _, result) } + final UnderscoreValue getValue() { json_pair_def(this, _, result) } /** Gets a field or child node of this node. */ final override AstNode getAFieldOrChild() { @@ -1849,11 +1857,11 @@ module JSON { /** Gets the name of the primary QL class for this element. */ final override string getAPrimaryQlClass() { result = "String" } - /** Gets the child of this node. */ - final StringContent getChild() { json_string_child(this, result) } + /** Gets the `i`th child of this node. */ + final AstNode getChild(int i) { json_string_child(this, i, result) } /** Gets a field or child node of this node. */ - final override AstNode getAFieldOrChild() { json_string_child(this, result) } + final override AstNode getAFieldOrChild() { json_string_child(this, _, result) } } /** A class representing `string_content` tokens. */ @@ -1867,6 +1875,4 @@ module JSON { /** Gets the name of the primary QL class for this element. */ final override string getAPrimaryQlClass() { result = "True" } } - - class Value extends @json_value, AstNode { } } diff --git a/ql/ql/src/ql.dbscheme b/ql/ql/src/ql.dbscheme index 21aebc3b431..e36eec681ed 100644 --- a/ql/ql/src/ql.dbscheme +++ b/ql/ql/src/ql.dbscheme @@ -1239,11 +1239,13 @@ blame_ast_node_parent( ); /*- JSON dbscheme -*/ +@json_underscore_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true + #keyset[json_array, index] json_array_child( int json_array: @json_array ref, int index: int ref, - unique int child: @json_value ref + unique int child: @json_underscore_value ref ); json_array_def( @@ -1254,7 +1256,7 @@ json_array_def( json_document_child( int json_document: @json_document ref, int index: int ref, - unique int child: @json_value ref + unique int child: @json_underscore_value ref ); json_document_def( @@ -1272,25 +1274,25 @@ json_object_def( unique int id: @json_object ); -@json_pair_key_type = @json_string__ | @json_token_number - json_pair_def( unique int id: @json_pair, - int key__: @json_pair_key_type ref, - int value: @json_value ref + int key__: @json_string__ ref, + int value: @json_underscore_value ref ); +@json_string_child_type = @json_token_escape_sequence | @json_token_string_content + +#keyset[json_string__, index] json_string_child( - unique int json_string__: @json_string__ ref, - unique int child: @json_token_string_content ref + int json_string__: @json_string__ ref, + int index: int ref, + unique int child: @json_string_child_type ref ); json_string_def( unique int id: @json_string__ ); -@json_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true - json_tokeninfo( unique int id: @json_token, int kind: int ref, @@ -1300,11 +1302,12 @@ json_tokeninfo( case @json_token.kind of 0 = @json_reserved_word | 1 = @json_token_comment -| 2 = @json_token_false -| 3 = @json_token_null -| 4 = @json_token_number -| 5 = @json_token_string_content -| 6 = @json_token_true +| 2 = @json_token_escape_sequence +| 3 = @json_token_false +| 4 = @json_token_null +| 5 = @json_token_number +| 6 = @json_token_string_content +| 7 = @json_token_true ; diff --git a/ql/rust-toolchain.toml b/ql/rust-toolchain.toml index 04b7b3d5fd6..57d004b953c 100644 --- a/ql/rust-toolchain.toml +++ b/ql/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" -components = [ "rustfmt" ] +components = [ "rustfmt" ] \ No newline at end of file diff --git a/ruby/extractor/Cargo.lock b/ruby/extractor/Cargo.lock index 750d630402b3719e2181b0b68b6e4d1f8dbe107c..15b1c3e61fef8d9a9fb39b26689473e97bbc8375 100644 GIT binary patch delta 691 zcma)3&1#e}6y+;e1Vu#bU1nre8$U01u^!s#Sb0Y#8n3gn zi5wWOVlduBiA+9;kdKL2Th3Za_8N?m&|Wb`!IbOgty`SD;F2ee zIcTwB(J@BwT0*jtRI`OxSPU8EFcEO$-m1SQSL@lNca4sh@7~gqcGM7tzRm}_-k#j4 zC-khIKKW2znCS?8LYtargdj0VbVWQ;<4#yS=Y&e24Z(=hA+|_`G5`v*E>!B_{oc9U z>TL4uW@qYwosF^tW{9!nlQU#)_rg8{C$>Dz8 znD2GEew;s_SCSCD%tNq2}yZ-^s C*wDNH delta 804 zcma))J!=#|5QaHpLO!sF#N?tNM}mk%kDdLR8O0czQ&I4e#>OP>?(AaJC|Xs#_yUQ>$@67Y=w_lwPzdL&yopII2zOK&)!)qzA@fNtS zFF^w&hz#BnVKkIowkiU%YtKmRV#Wl8O^}>WA%wc$pRbpurt0xy-O=UFkI4?c-C18% zViYT{$%6(ts54@boiWimDuk9zG%DEW3UcrYB~oz$5CTK(VR!U(T2Bm9&b}yRia1K% zw#ky$Tu>-#jD$*jK}v47*OUl=`}{k}NSgFZ9_w^uH7OzjO|^nT^t;u!W#h4H&< z6ruPwqPE3{#Gs2MHn}L$)(WcVl}AfTbb(-fW~UN+hTNhoAa#FizOJpzoa`NvR!bMB zdrP}{H$8mO#OCU`8&~Sn!Ote$*%_`DQZ^y!c44Nrni!q6V973kauO(KqrEK(Sm_4L zC(3OAN!pUBdt(a+TC;yw`d075pk8VDh>!|u{u(6iWVq16XlGxg2(`TFtxO#Qrd>QLrt?QWk~x6&G$#z7TH76Vz+{$eT|vQZ66 ZB#SO&Ts~^zzjapX%LkLAPumM;{{SON<*5Jw diff --git a/ruby/extractor/Cargo.toml b/ruby/extractor/Cargo.toml index 87a9f9f7a80..d85f64d4f13 100644 --- a/ruby/extractor/Cargo.toml +++ b/ruby/extractor/Cargo.toml @@ -3,10 +3,10 @@ name = "codeql-extractor-ruby" description = "CodeQL Ruby extractor" version = "0.1.0" authors = ["GitHub"] -edition = "2018" +edition = "2021" # When changing/updating these, the `cargo-bazel-lock.json` file has to be regenerated. -# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=ruby_deps ./build --bazel sync --only=ruby_deps` +# Run `CARGO_BAZEL_REPIN=true CARGO_BAZEL_REPIN_ONLY=ruby_deps ./tools/bazel sync --only=ruby_deps` # in the `semmle-code` repository to do so. # For more information, check out the documentation at # https://bazelbuild.github.io/rules_rust/crate_universe.html#repinning--updating-dependencies @@ -15,9 +15,9 @@ edition = "2018" # (c.f. https://github.com/bazelbuild/rules_rust/issues/2452). # Warning: The process takes >5min on my M1 mac, so do wait for a while. [dependencies] -tree-sitter = "0.20" -tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "203f7bd3c1bbfbd98fc19add4b8fcb213c059205" } -tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "4d9ad3f010fdc47a8433adcf9ae30c8eb8475ae7" } +tree-sitter = ">= 0.22.6" +tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "38d5004a797298dc42c85e7706c5ceac46a3f29f" } +tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "788a63ca1b7619288980aaafd37d890ee2469421" } clap = { version = "4.2", features = ["derive"] } tracing = "0.1" tracing-subscriber = { version = "0.3.3", features = ["env-filter"] } diff --git a/ruby/extractor/rust-toolchain.toml b/ruby/extractor/rust-toolchain.toml index e56467f055c..1295f479382 100644 --- a/ruby/extractor/rust-toolchain.toml +++ b/ruby/extractor/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "rustfmt" ] diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 0614d25eb16..b287d297164 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -109,7 +109,7 @@ pub fn run(options: Options) -> std::io::Result<()> { if path.extension().map_or(false, |x| x == "erb") { tracing::info!("scanning: {}", path.display()); extractor::extract( - erb, + &erb, "erb", &erb_schema, &mut diagnostics_writer, @@ -120,7 +120,7 @@ pub fn run(options: Options) -> std::io::Result<()> { ); let (ranges, line_breaks) = scan_erb( - erb, + &erb, &source, erb_directive_id, erb_output_directive_id, @@ -196,7 +196,7 @@ pub fn run(options: Options) -> std::io::Result<()> { code_ranges = vec![]; } extractor::extract( - language, + &language, "ruby", &schema, &mut diagnostics_writer, @@ -249,7 +249,7 @@ fn write_trap( } fn scan_erb( - erb: Language, + erb: &Language, source: &[u8], directive_id: u16, output_directive_id: u16, diff --git a/shared/tree-sitter-extractor/Cargo.toml b/shared/tree-sitter-extractor/Cargo.toml index 66e5e7e38c4..96b5c498e80 100644 --- a/shared/tree-sitter-extractor/Cargo.toml +++ b/shared/tree-sitter-extractor/Cargo.toml @@ -7,7 +7,7 @@ authors = ["GitHub"] [dependencies] flate2 = "1.0" globset = "0.4" -tree-sitter = "0.20" +tree-sitter = ">= 0.22.6" tracing = "0.1" rayon = "1.5.0" regex = "1.7.1" @@ -20,5 +20,5 @@ num_cpus = "1.14.0" [dev-dependencies] tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql" } -tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json" } +tree-sitter-json = {git = "https://github.com/tree-sitter/tree-sitter-json" } rand = "0.8.5" diff --git a/shared/tree-sitter-extractor/rust-toolchain.toml b/shared/tree-sitter-extractor/rust-toolchain.toml index 9582cce2e6e..7fe5bcb46f8 100644 --- a/shared/tree-sitter-extractor/rust-toolchain.toml +++ b/shared/tree-sitter-extractor/rust-toolchain.toml @@ -2,6 +2,6 @@ # extractor. It is set to the lowest version of Rust we want to support. [toolchain] -channel = "1.68" +channel = "1.70" profile = "minimal" components = [ "clippy", "rustfmt" ] \ No newline at end of file diff --git a/shared/tree-sitter-extractor/src/extractor/mod.rs b/shared/tree-sitter-extractor/src/extractor/mod.rs index d26e5e45975..54ae50fd69e 100644 --- a/shared/tree-sitter-extractor/src/extractor/mod.rs +++ b/shared/tree-sitter-extractor/src/extractor/mod.rs @@ -150,7 +150,7 @@ fn location_label( /// Extracts the source file at `path`, which is assumed to be canonicalized. pub fn extract( - language: Language, + language: &Language, language_prefix: &str, schema: &NodeTypeMap, diagnostics_writer: &mut diagnostics::LogWriter, diff --git a/shared/tree-sitter-extractor/src/extractor/simple.rs b/shared/tree-sitter-extractor/src/extractor/simple.rs index b2ef1486232..baf620d19a3 100644 --- a/shared/tree-sitter-extractor/src/extractor/simple.rs +++ b/shared/tree-sitter-extractor/src/extractor/simple.rs @@ -137,7 +137,7 @@ impl Extractor { let lang = &self.languages[i]; crate::extractor::extract( - lang.ts_language, + &lang.ts_language, lang.prefix, &schemas[i], &mut diagnostics_writer,