diff --git a/Cargo.lock b/Cargo.lock index f8bdf27a548..c311de0b5fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,21 +58,15 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "byteorder" -version = "1.3.4" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" [[package]] name = "cc" -version = "1.0.65" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95752358c8f7552394baf48cd82695b345628ad3f170d607de3ca03b8dacca15" - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" [[package]] name = "cfg-if" @@ -110,9 +104,9 @@ dependencies = [ [[package]] name = "const_fn" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd51eab21ab4fd6a3bf889e2d0958c0a6e3a61ad04260325e919e652a2a62826" +checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6" [[package]] name = "crc32fast" @@ -120,7 +114,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -129,7 +123,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -139,7 +133,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] @@ -150,7 +144,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "const_fn", "crossbeam-utils", "lazy_static", @@ -165,7 +159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" dependencies = [ "autocfg", - "cfg-if 1.0.0", + "cfg-if", "lazy_static", ] @@ -177,43 +171,30 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "flate2" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7411863d55df97a419aa64cb4d2f167103ea9d767e2c54a1868b7ac3f6b47129" +checksum = "cd3aec53de10fe96d7d8c565eb17f2c687bb5518a2ec453b5b1252964526abe0" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crc32fast", "libc", "miniz_oxide", ] -[[package]] -name = "generator" -version = "0.6.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdc09201b2e8ca1b19290cf7e65de2246b8e91fb6874279722189c4de7b94dc" -dependencies = [ - "cc", - "libc", - "log", - "rustc_version", - "winapi", -] - [[package]] name = "hermit-abi" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" dependencies = [ "libc", ] [[package]] name = "itoa" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "lazy_static" @@ -223,30 +204,17 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.80" +version = "0.2.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" +checksum = "7ccac4b00700875e6a07c6cde370d44d32fa01c5a65cdd2fca6858c479d28bb3" [[package]] name = "log" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 0.1.10", -] - -[[package]] -name = "loom" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0e8460f2f2121162705187214720353c517b97bdfb3494c0b1e33d83ebe4bed" -dependencies = [ - "cfg-if 0.1.10", - "generator", - "scoped-tls", - "serde", - "serde_json", + "cfg-if", ] [[package]] @@ -321,10 +289,16 @@ dependencies = [ ] [[package]] -name = "pin-project-lite" -version = "0.2.0" +name = "once_cell" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b063f57ec186e6140e2b8b6921e5f1bd89c7356dda5b33acc5401203ca6131c" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" + +[[package]] +name = "pin-project-lite" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439697af366c49a6d0a010c56a0d97685bc140ce0d377b13a2ea2aa42d64a827" [[package]] name = "proc-macro2" @@ -337,9 +311,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" dependencies = [ "proc-macro2", ] @@ -371,9 +345,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" dependencies = [ "aho-corasick", "memchr", @@ -393,9 +367,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.21" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" [[package]] name = "ruby-extractor" @@ -406,9 +380,11 @@ dependencies = [ "node-types", "num_cpus", "rayon", + "regex", "tracing", "tracing-subscriber", "tree-sitter", + "tree-sitter-embedded-template", "tree-sitter-ruby", ] @@ -422,62 +398,32 @@ dependencies = [ "tree-sitter-ruby", ] -[[package]] -name = "rustc_version" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" -dependencies = [ - "semver", -] - [[package]] name = "ryu" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" -[[package]] -name = "scoped-tls" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" - [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "semver" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" -dependencies = [ - "semver-parser", -] - -[[package]] -name = "semver-parser" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" - [[package]] name = "serde" -version = "1.0.117" +version = "1.0.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.117" +version = "1.0.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" +checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31" dependencies = [ "proc-macro2", "quote", @@ -486,9 +432,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.60" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1500e84d27fe482ed1dc791a56eddc2f230046a040fa908c08bda1d9fb615779" +checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" dependencies = [ "itoa", "ryu", @@ -497,19 +443,18 @@ dependencies = [ [[package]] name = "sharded-slab" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4921be914e16899a80adefb821f8ddb7974e3f1250223575a44ed994882127" +checksum = "79c719719ee05df97490f80a45acfc99e5a30ce98a1e4fb67aee422745ae14e3" dependencies = [ "lazy_static", - "loom", ] [[package]] name = "smallvec" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" [[package]] name = "strsim" @@ -519,9 +464,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "syn" -version = "1.0.53" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8833e20724c24de12bbaba5ad230ea61c3eafb05b881c7c9d3cfe8638b187e68" +checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" dependencies = [ "proc-macro2", "quote", @@ -539,11 +484,11 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.0.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" dependencies = [ - "lazy_static", + "once_cell", ] [[package]] @@ -559,11 +504,11 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f47026cdc4080c07e49b37087de021820269d996f581aac150ef9e5583eefe3" +checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -571,9 +516,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e0ccfc3378da0cce270c946b676a376943f5cd16aeba64568e7939806f4ada" +checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47" dependencies = [ "proc-macro2", "quote", @@ -642,6 +587,15 @@ dependencies = [ "regex", ] +[[package]] +name = "tree-sitter-embedded-template" +version = "0.17.0" +source = "git+https://github.com/aibaars/tree-sitter-embedded-template?rev=d4aac29c08aa7c596633d00b5ec2dd2d247eafe4#d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-ruby" version = "0.17.0" diff --git a/codeql-extractor.yml b/codeql-extractor.yml index c4000c4c656..23a2c659502 100644 --- a/codeql-extractor.yml +++ b/codeql-extractor.yml @@ -7,4 +7,8 @@ file_types: - name: ruby display_name: Ruby files extensions: - - .rb + - .rb + - name: ERB + display_name: Ruby templates + extensions: + - .erb diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index e5c450126ec..dedc2732551 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -10,9 +10,11 @@ edition = "2018" flate2 = "1.0" node-types = { path = "../node-types" } tree-sitter = "0.17" +tree-sitter-embedded-template = { git = "https://github.com/aibaars/tree-sitter-embedded-template", rev = "d4aac29c08aa7c596633d00b5ec2dd2d247eafe4" } tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "add8cb36d5fc0a00d4499ba2e8eedc04a38a2488" } clap = "2.33" tracing = "0.1" tracing-subscriber = { version = "0.2", features = ["env-filter"] } rayon = "1.5.0" num_cpus = "1.13.0" +regex = "1.4.3" diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index 69f0a72db45..980d9562519 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -4,7 +4,7 @@ use std::collections::BTreeSet as Set; use std::fmt; use std::path::Path; use tracing::{error, info, span, Level}; -use tree_sitter::{Language, Node, Parser, Tree}; +use tree_sitter::{Language, Node, Parser, Range, Tree}; struct TrapWriter { /// The accumulated trap entries @@ -149,7 +149,13 @@ impl TrapWriter { } /// Extracts the source file at `path`, which is assumed to be canonicalized. -pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io::Result { +pub fn extract( + language: Language, + schema: &NodeTypeMap, + path: &Path, + source: &Vec, + ranges: &[Range], +) -> std::io::Result { let span = span!( Level::TRACE, "extract", @@ -162,7 +168,7 @@ pub fn extract(language: Language, schema: &NodeTypeMap, path: &Path) -> std::io let mut parser = Parser::new(); parser.set_language(language).unwrap(); - let source = std::fs::read(&path)?; + parser.set_included_ranges(&ranges).unwrap(); let tree = parser.parse(&source, None).expect("Failed to parse file"); let mut trap_writer = new_trap_writer(); trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display())); diff --git a/extractor/src/main.rs b/extractor/src/main.rs index f4e5bddd889..d6880a7a90d 100644 --- a/extractor/src/main.rs +++ b/extractor/src/main.rs @@ -8,6 +8,7 @@ use rayon::prelude::*; use std::fs; use std::io::{BufRead, BufWriter, Write}; use std::path::{Path, PathBuf}; +use tree_sitter::{Language, Parser, Range}; enum TrapCompression { None, @@ -126,6 +127,7 @@ fn main() -> std::io::Result<()> { let file_list = fs::File::open(file_list)?; let language = tree_sitter_ruby::language(); + let erb = tree_sitter_embedded_template::language(); let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?; let lines: std::io::Result> = std::io::BufReader::new(file_list).lines().collect(); let lines = lines?; @@ -133,16 +135,28 @@ fn main() -> std::io::Result<()> { let path = PathBuf::from(line).canonicalize()?; let trap_file = path_for(&trap_dir, &path, trap_compression.extension()); let src_archive_file = path_for(&src_archive_dir, &path, ""); - let trap = extractor::extract(language, &schema, &path)?; + let mut source = std::fs::read(&path)?; + let code_ranges; + if path.extension().map_or(false, |x| x == "erb") { + tracing::info!("scanning: {}", path.display()); + let (ranges, line_breaks) = scan_erb(erb, &source); + for i in line_breaks { + if i < source.len() { + source[i] = b'\n'; + } + } + code_ranges = ranges; + } else { + code_ranges = vec![]; + } + let trap = extractor::extract(language, &schema, &path, &source, &code_ranges)?; std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; std::fs::copy(&path, &src_archive_file)?; std::fs::create_dir_all(&trap_file.parent().unwrap())?; let trap_file = std::fs::File::create(&trap_file)?; let mut trap_file = BufWriter::new(trap_file); match trap_compression { - TrapCompression::None => { - write!(trap_file, "{}", trap) - } + TrapCompression::None => write!(trap_file, "{}", trap), TrapCompression::Gzip => { let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); write!(compressed_writer, "{}", trap) @@ -151,6 +165,42 @@ fn main() -> std::io::Result<()> { }) } +fn scan_erb(erb: Language, source: &std::vec::Vec) -> (Vec, Vec) { + let mut parser = Parser::new(); + parser.set_language(erb).unwrap(); + let tree = parser.parse(&source, None).expect("Failed to parse file"); + let mut result = Vec::new(); + let mut line_breaks = vec![]; + + for n in tree.root_node().children(&mut tree.walk()) { + let kind = n.kind(); + if kind == "directive" || kind == "output_directive" { + for c in n.children(&mut tree.walk()) { + if c.kind() == "code" { + let mut range = c.range(); + if range.end_byte < source.len() { + line_breaks.push(range.end_byte); + range.end_byte += 1; + range.end_point.column += 1; + } + result.push(range); + } + } + } + } + if result.len() == 0 { + let root = tree.root_node(); + // Add an empty range at the end of the file + result.push(Range { + start_byte: root.end_byte(), + end_byte: root.end_byte(), + start_point: root.end_position(), + end_point: root.end_position(), + }); + } + (result, line_breaks) +} + fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf { let mut result = PathBuf::from(dir); for component in path.components() { diff --git a/tools/autobuild.cmd b/tools/autobuild.cmd index 023c8d9be16..2d88262927f 100644 --- a/tools/autobuild.cmd +++ b/tools/autobuild.cmd @@ -2,6 +2,7 @@ type NUL && "%CODEQL_DIST%\codeql.exe" database index-files ^ --include-extension=.rb ^ + --include-extension=.erb ^ --size-limit=5m ^ --language=ruby ^ "%CODEQL_EXTRACTOR_RUBY_WIP_DATABASE%" diff --git a/tools/autobuild.sh b/tools/autobuild.sh index c529ac1d16d..b50fd89f8aa 100755 --- a/tools/autobuild.sh +++ b/tools/autobuild.sh @@ -4,6 +4,7 @@ set -eu exec "${CODEQL_DIST}/codeql" database index-files \ --include-extension=.rb \ + --include-extension=.erb \ --size-limit=5m \ --language=ruby \ --working-dir=.\