diff --git a/Cargo.lock b/Cargo.lock index 046bb12a1f8..5406d36570b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,12 +9,53 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + [[package]] name = "cc" version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d" +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "generator" version = "0.1.0" @@ -22,6 +63,15 @@ dependencies = [ "node-types", ] +[[package]] +name = "hermit-abi" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" +dependencies = [ + "libc", +] + [[package]] name = "itoa" version = "0.4.6" @@ -34,6 +84,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" + [[package]] name = "memchr" version = "2.3.3" @@ -89,6 +145,9 @@ name = "ruby-extractor" version = "0.1.0" dependencies = [ "cc", + "clap", + "serde", + "serde_json", "tree-sitter", ] @@ -129,6 +188,12 @@ dependencies = [ "serde", ] +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.46" @@ -140,6 +205,15 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -159,8 +233,42 @@ dependencies = [ "regex", ] +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index 5a0bbbaf343..5189c2b93da 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -8,6 +8,9 @@ edition = "2018" [dependencies] tree-sitter = "0.17.0" - +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +clap = "2.33" [build-dependencies] cc="*" + diff --git a/extractor/build.rs b/extractor/build.rs index 2b849cd3ba5..c152a1f4943 100644 --- a/extractor/build.rs +++ b/extractor/build.rs @@ -2,10 +2,13 @@ use std::path::PathBuf; fn main() { let dir: PathBuf = ["../tree-sitter-ruby", "src"].iter().collect(); - - cc::Build::new() + let mut build = cc::Build::new(); + build .include(&dir) - .file(dir.join("parser.c")) - .file(dir.join("scanner.cc")) - .compile("tree-sitter-ruby"); + .file(&dir.join("parser.c")) + .file(&dir.join("scanner.cc")); + if !cfg!(windows) { + build.cpp(true).compiler("clang"); + } + build.compile("tree-sitter-ruby"); } diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs new file mode 100644 index 00000000000..b724ce384a2 --- /dev/null +++ b/extractor/src/extractor.rs @@ -0,0 +1,489 @@ +use super::nodes_types::{Entry, Field, Storage, TypeName}; + +use std::collections::BTreeMap as Map; +use std::collections::BTreeSet as Set; +use std::fmt; +use std::path::Path; +use tree_sitter::{Language, Node, Parser, Tree}; + +pub struct Extractor { + pub parser: Parser, + pub schema: Vec, +} + +pub fn create(language: Language, schema: Vec) -> Extractor { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + + Extractor { parser, schema } +} +impl Extractor { + pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result { + let source = std::fs::read(&path)?; + let tree = &self + .parser + .parse(&source, None) + .expect("Failed to parse file"); + let mut visitor = Visitor { + source: &source, + trap_output: vec![TrapEntry::Comment(format!( + "Auto-generated TRAP file for {}", + path.display() + ))], + counter: -1, + // TODO: should we handle path strings that are not valid UTF8 better? + path: format!("{}", path.display()), + stack: Vec::new(), + tables: build_schema_lookup(&self.schema), + union_types: build_union_type_lookup(&self.schema), + }; + traverse(&tree, &mut visitor); + + &self.parser.reset(); + Ok(Program(visitor.trap_output)) + } +} + +fn build_schema_lookup<'a>(schema: &'a Vec) -> Map<&'a TypeName, &'a Entry> { + let mut map = std::collections::BTreeMap::new(); + for entry in schema { + if let Entry::Table { type_name, .. } = entry { + map.insert(type_name, entry); + } + } + map +} + +fn build_union_type_lookup<'a>(schema: &'a Vec) -> Map<&'a TypeName, &'a Set> { + let mut union_types = std::collections::BTreeMap::new(); + for entry in schema { + if let Entry::Union { type_name, members } = entry { + union_types.insert(type_name, members); + } + } + union_types +} + +struct Visitor<'a> { + /// The file path of the source code (as string) + path: String, + /// The source code as a UTF-8 byte array + source: &'a Vec, + /// The accumulated trap entries + trap_output: Vec, + /// A counter for generating fresh labels + counter: i32, + /// A lookup table from type name to dbscheme table entries + tables: Map<&'a TypeName, &'a Entry>, + /// A lookup table for union types mapping a type name to its direct members + union_types: Map<&'a TypeName, &'a Set>, + /// A stack for gathering information from hild nodes. Whenever a node is entered + /// an empty list is pushed. All children append their data (field name, label, type) to + /// the the list. When the visitor leaves a node the list containing the child data is popped + /// from the stack and matched against the dbscheme for the node. If the expectations are met + /// the corresponding row definitions are added to the trap_output. + stack: Vec, Label, TypeName)>>, +} + +impl Visitor<'_> { + fn enter_node(&mut self, node: Node) -> bool { + if node.is_error() { + println!( + "error: {}:{}: parse error", + &self.path, + node.start_position().row, + ); + return false; + } + if node.is_missing() { + println!( + "error: {}:{}: parse error: expecting '{}'", + &self.path, + node.start_position().row, + node.kind() + ); + return false; + } + + if node.is_extra() { + return false; + } + + self.stack.push(Vec::new()); + return true; + } + + fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { + if node.is_extra() || node.is_error() || node.is_missing() { + return; + } + let child_nodes = self.stack.pop().expect("Vistor: empty stack"); + let table = self.tables.get(&TypeName { + kind: node.kind().to_owned(), + named: node.is_named(), + }); + if let Some(Entry::Table { fields, .. }) = table { + self.counter += 1; + let id = Label::Normal(self.counter); + let loc = Label::Location(self.counter); + self.trap_output.push(TrapEntry::New(id)); + self.trap_output.push(TrapEntry::New(loc)); + self.trap_output.push(location_for(&self.path, loc, node)); + let table_name = node_type_name(node.kind(), node.is_named()); + let args: Option>; + if fields.is_empty() { + args = Some(vec![sliced_source_arg(self.source, node)]); + } else { + args = self.complex_node(&node, fields, child_nodes, id); + } + if let Some(args) = args { + self.trap_output + .push(TrapEntry::Definition(table_name, id, args, loc)); + } + if let Some(parent) = self.stack.last_mut() { + parent.push(( + field_name, + id, + TypeName { + kind: node.kind().to_owned(), + named: node.is_named(), + }, + )) + }; + } else { + println!( + "error: {}:{}: unknown table type: '{}'", + &self.path, + node.start_position().row, + node.kind() + ); + } + } + fn complex_node( + &mut self, + node: &Node, + fields: &Vec, + child_nodes: Vec<(Option<&str>, Label, TypeName)>, + parent_id: Label, + ) -> Option> { + let mut map: Map<&Option, (&Field, Vec