Merge pull request #12546 from hmac/extractor-shared-library

Introduce a shared extractor library
2026-04-29 10:45:15 +02:00 · 2023-03-27 11:32:33 +02:00
parent 4964f86df5 6b2e8847f5
commit 7e7cd54793
41 changed files with 152 additions and 2509 deletions
--- a/ruby/actions/create-extractor-pack/action.yml
+++ b/ruby/actions/create-extractor-pack/action.yml
@@ -24,6 +24,6 @@ runs:
      if: steps.cache-extractor.outputs.cache-hit != 'true'
      shell: bash
      run: |
-        cargo install cross --version 0.2.1
+        cargo install cross --version 0.2.5
        scripts/create-extractor-pack.sh
      working-directory: ruby
--- a/ruby/extractor/Cargo.lock
+++ b/ruby/extractor/Cargo.lock
--- a/ruby/extractor/Cargo.toml
+++ b/ruby/extractor/Cargo.toml
@@ -7,7 +7,6 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-flate2 = "1.0"
 tree-sitter = "0.20"
 tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "203f7bd3c1bbfbd98fc19add4b8fcb213c059205" }
 tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "206c7077164372c596ffa8eaadb9435c28941364" }
@@ -15,10 +14,7 @@ clap = "3.0"
 tracing = "0.1"
 tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
 rayon = "1.5.0"
-num_cpus = "1.14.0"
 regex = "1.7.1"
 encoding = "0.2"
 lazy_static = "1.4.0"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
-chrono = { version = "0.4.19", features = ["serde"] }
+codeql-extractor = { path = "../../shared/tree-sitter-extractor" }
--- a/ruby/extractor/Cross.toml
+++ b/ruby/extractor/Cross.toml
@@ -1,2 +1,8 @@
 [target.x86_64-unknown-linux-gnu]
 image = "centos/devtoolset-7-toolchain-centos7"
+
+[build.env]
+# Provide the path to the shared extractor
+# Cross mounts this directory as a volume, so builds inside the docker container
+# can see it.
+volumes = ["__CODEQL-EXTRACTOR=../../shared/tree-sitter-extractor"]
--- a/ruby/extractor/src/bin/extractor.rs
+++ b/ruby/extractor/src/bin/extractor.rs
@@ -1,9 +1,8 @@
 #[macro_use]
 extern crate lazy_static;
-extern crate num_cpus;

 use clap::arg;
-use encoding::{self};
+use encoding;
 use rayon::prelude::*;
 use std::borrow::Cow;
 use std::fs;
@@ -11,33 +10,7 @@ use std::io::BufRead;
 use std::path::{Path, PathBuf};
 use tree_sitter::{Language, Parser, Range};

-use ruby_extractor::{diagnostics, extractor, file_paths, node_types, trap};
-
-/**
- * Gets the number of threads the extractor should use, by reading the
- * CODEQL_THREADS environment variable and using it as described in the
- * extractor spec:
- *
- * "If the number is positive, it indicates the number of threads that should
- * be used. If the number is negative or zero, it should be added to the number
- * of cores available on the machine to determine how many threads to use
- * (minimum of 1). If unspecified, should be considered as set to -1."
- */
-fn num_codeql_threads() -> Result<usize, String> {
-    let threads_str = std::env::var("CODEQL_THREADS").unwrap_or_else(|_| "-1".to_owned());
-    match threads_str.parse::<i32>() {
-        Ok(num) if num <= 0 => {
-            let reduction = -num as usize;
-            Ok(std::cmp::max(1, num_cpus::get() - reduction))
-        }
-        Ok(num) => Ok(num as usize),
-
-        Err(_) => Err(format!(
-            "Unable to parse CODEQL_THREADS value '{}'",
-            &threads_str
-        )),
-    }
-}
+use codeql_extractor::{diagnostics, extractor, file_paths, node_types, trap};

 lazy_static! {
    static ref CP_NUMBER: regex::Regex = regex::Regex::new("cp([0-9]+)").unwrap();
@@ -67,7 +40,7 @@ fn main() -> std::io::Result<()> {
        .init();
    let diagnostics = diagnostics::DiagnosticLoggers::new("ruby");
    let mut main_thread_logger = diagnostics.logger();
-    let num_threads = match num_codeql_threads() {
+    let num_threads = match codeql_extractor::options::num_threads() {
        Ok(num) => num,
        Err(e) => {
            main_thread_logger.write(
@@ -307,8 +280,10 @@ fn scan_erb(
            }
        }
    }
+
    if result.is_empty() {
        let root = tree.root_node();
+
        // Add an empty range at the end of the file
        result.push(Range {
            start_byte: root.end_byte(),
--- a/ruby/extractor/src/bin/generator.rs
+++ b/ruby/extractor/src/bin/generator.rs
@@ -6,8 +6,8 @@ use std::io::LineWriter;
 use std::io::Write;
 use std::path::PathBuf;

-use ruby_extractor::generator::{dbscheme, language::Language, ql, ql_gen};
-use ruby_extractor::node_types;
+use codeql_extractor::generator::{dbscheme, language::Language, ql, ql_gen};
+use codeql_extractor::node_types;

 /// Given the name of the parent node, and its field information, returns a pair,
 /// the first of which is the field's type. The second is an optional dbscheme
--- a/ruby/extractor/src/diagnostics.rs
+++ b/ruby/extractor/src/diagnostics.rs
@@ -1,364 +0,0 @@
-use serde::Serialize;
-use std::io::Write;
-use std::path::PathBuf;
-/** SARIF severity */
-#[derive(Serialize)]
-#[serde(rename_all = "lowercase")]
-pub enum Severity {
-    Error,
-    Warning,
-    #[allow(unused)]
-    Note,
-}
-
-#[derive(Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct Source {
-    /** An identifier under which it makes sense to group this diagnostic message. This is used to build the SARIF reporting descriptor object.*/
-    pub id: String,
-    /** Display name for the ID. This is used to build the SARIF reporting descriptor object. */
-    pub name: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    /** Name of the CodeQL extractor. This is used to identify which tool component the reporting descriptor object should be nested under in SARIF.*/
-    pub extractor_name: Option<String>,
-}
-
-#[derive(Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct Visibility {
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    /** True if the message should be displayed on the status page (defaults to false) */
-    pub status_page: bool,
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    /** True if the message should be counted in the diagnostics summary table printed by `codeql database analyze` (defaults to false) */
-    pub cli_summary_table: bool,
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    /** True if the message should be sent to telemetry (defaults to false) */
-    pub telemetry: bool,
-}
-
-#[derive(Serialize, Clone, Default)]
-#[serde(rename_all = "camelCase")]
-pub struct Location {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    /** Path to the affected file if appropriate, relative to the source root */
-    pub file: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub start_line: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub start_column: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub end_line: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub end_column: Option<usize>,
-}
-
-#[derive(Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DiagnosticMessage {
-    /** Unix timestamp */
-    pub timestamp: chrono::DateTime<chrono::Utc>,
-    pub source: Source,
-    #[serde(skip_serializing_if = "String::is_empty")]
-    /** GitHub flavored Markdown formatted message. Should include inline links to any help pages. */
-    pub markdown_message: String,
-    #[serde(skip_serializing_if = "String::is_empty")]
-    /** Plain text message. Used by components where the string processing needed to support Markdown is cumbersome. */
-    pub plaintext_message: String,
-    #[serde(skip_serializing_if = "Vec::is_empty")]
-    /** List of help links intended to supplement the `plaintextMessage`. */
-    pub help_links: Vec<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub severity: Option<Severity>,
-    #[serde(skip_serializing_if = "std::ops::Not::not")]
-    pub internal: bool,
-    #[serde(skip_serializing_if = "is_default_visibility")]
-    pub visibility: Visibility,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub location: Option<Location>,
-}
-
-fn is_default_visibility(v: &Visibility) -> bool {
-    !v.cli_summary_table && !v.status_page && !v.telemetry
-}
-
-pub struct LogWriter {
-    extractor: String,
-    path: Option<PathBuf>,
-    inner: Option<std::io::BufWriter<std::fs::File>>,
-}
-
-impl LogWriter {
-    pub fn new_entry(&self, id: &str, name: &str) -> DiagnosticMessage {
-        DiagnosticMessage {
-            timestamp: chrono::Utc::now(),
-            source: Source {
-                id: format!("{}/{}", self.extractor, id),
-                name: name.to_owned(),
-                extractor_name: Some(self.extractor.to_owned()),
-            },
-            markdown_message: String::new(),
-            plaintext_message: String::new(),
-            help_links: vec![],
-            severity: None,
-            internal: false,
-            visibility: Visibility {
-                cli_summary_table: false,
-                status_page: false,
-                telemetry: false,
-            },
-            location: None,
-        }
-    }
-    pub fn write(&mut self, mesg: &DiagnosticMessage) {
-        let full_error_message = mesg.full_error_message();
-
-        match mesg.severity {
-            Some(Severity::Error) => tracing::error!("{}", full_error_message),
-            Some(Severity::Warning) => tracing::warn!("{}", full_error_message),
-            Some(Severity::Note) => tracing::info!("{}", full_error_message),
-            None => tracing::debug!("{}", full_error_message),
-        }
-        if self.inner.is_none() {
-            if let Some(path) = self.path.as_ref() {
-                match std::fs::OpenOptions::new()
-                    .create(true)
-                    .append(true)
-                    .write(true)
-                    .open(&path)
-                {
-                    Err(e) => {
-                        tracing::error!(
-                            "Could not open log file '{}': {}",
-                            &path.to_string_lossy(),
-                            e
-                        );
-                        self.path = None;
-                        self.inner = None
-                    }
-                    Ok(file) => self.inner = Some(std::io::BufWriter::new(file)),
-                }
-            }
-        }
-        if let Some(mut writer) = self.inner.as_mut() {
-            serde_json::to_writer(&mut writer, mesg)
-                .unwrap_or_else(|e| tracing::debug!("Failed to write log entry: {}", e));
-            &mut writer
-                .write_all(b"\n")
-                .unwrap_or_else(|e| tracing::debug!("Failed to write log entry: {}", e));
-        }
-    }
-}
-
-pub struct DiagnosticLoggers {
-    extractor: String,
-    root: Option<PathBuf>,
-}
-
-impl DiagnosticLoggers {
-    pub fn new(extractor: &str) -> Self {
-        let env_var = format!(
-            "CODEQL_EXTRACTOR_{}_DIAGNOSTIC_DIR",
-            extractor.to_ascii_uppercase()
-        );
-
-        let root = match std::env::var(&env_var) {
-            Err(e) => {
-                tracing::error!("{}: {}", e, &env_var);
-                None
-            }
-            Ok(dir) => {
-                if let Err(e) = std::fs::create_dir_all(&dir) {
-                    tracing::error!("Failed to create log directory {}: {}", &dir, e);
-                    None
-                } else {
-                    Some(PathBuf::from(dir))
-                }
-            }
-        };
-        DiagnosticLoggers {
-            extractor: extractor.to_owned(),
-            root,
-        }
-    }
-
-    pub fn logger(&self) -> LogWriter {
-        thread_local! {
-            static THREAD_NUM: usize = {
-                static COUNT: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
-                COUNT.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
-            };
-        }
-        THREAD_NUM.with(|n| LogWriter {
-            extractor: self.extractor.to_owned(),
-            inner: None,
-            path: self
-                .root
-                .as_ref()
-                .map(|root| root.to_owned().join(format!("extractor_{}.jsonl", n))),
-        })
-    }
-}
-
-fn longest_backtick_sequence_length(text: &str) -> usize {
-    let mut result = 0;
-    let mut count = 0;
-    for c in text.chars() {
-        if c == '`' {
-            count += 1;
-        } else {
-            if count > result {
-                result = count;
-            }
-            count = 0;
-        }
-    }
-    result
-}
-
-/// An argument of a diagnostic message format string.
-/// A message argument is either a "code" snippet or a link.
-pub enum MessageArg<'a> {
-    Code(&'a str),
-    Link(&'a str, &'a str),
-}
-
-impl DiagnosticMessage {
-    pub fn full_error_message(&self) -> String {
-        match &self.location {
-            Some(Location {
-                file: Some(path),
-                start_line: None,
-                ..
-            }) => format!("{}: {}", path, self.plaintext_message),
-            Some(Location {
-                file: Some(path),
-                start_line: Some(line),
-                ..
-            }) => format!("{}:{}: {}", path, line, self.plaintext_message),
-            _ => self.plaintext_message.to_owned(),
-        }
-    }
-
-    fn text(&mut self, text: &str) -> &mut Self {
-        self.plaintext_message = text.to_owned();
-        self
-    }
-
-    pub fn message(&mut self, text: &str, args: &[MessageArg]) -> &mut Self {
-        let parts = text.split("{}");
-        let mut plain = String::with_capacity(2 * text.len());
-        let mut markdown = String::with_capacity(2 * text.len());
-        for (i, p) in parts.enumerate() {
-            plain.push_str(p);
-            markdown.push_str(p);
-            match args.get(i) {
-                Some(MessageArg::Code(t)) => {
-                    plain.push_str(t);
-                    if t.len() > 0 {
-                        let count = longest_backtick_sequence_length(t) + 1;
-                        markdown.push_str(&"`".repeat(count));
-                        if count > 1 {
-                            markdown.push_str(" ");
-                        }
-                        markdown.push_str(t);
-                        if count > 1 {
-                            markdown.push_str(" ");
-                        }
-                        markdown.push_str(&"`".repeat(count));
-                    }
-                }
-                Some(MessageArg::Link(text, url)) => {
-                    plain.push_str(text);
-                    self.help_link(url);
-                    markdown.push_str("[");
-                    markdown.push_str(text);
-                    markdown.push_str("](");
-                    markdown.push_str(url);
-                    markdown.push_str(")");
-                }
-                None => {}
-            }
-        }
-        self.text(&plain);
-        self.markdown(&markdown);
-        self
-    }
-
-    pub fn markdown(&mut self, text: &str) -> &mut Self {
-        self.markdown_message = text.to_owned();
-        self
-    }
-    pub fn severity(&mut self, severity: Severity) -> &mut Self {
-        self.severity = Some(severity);
-        self
-    }
-    #[allow(unused)]
-    pub fn help_link(&mut self, link: &str) -> &mut Self {
-        self.help_links.push(link.to_owned());
-        self
-    }
-    #[allow(unused)]
-    pub fn internal(&mut self) -> &mut Self {
-        self.internal = true;
-        self
-    }
-    #[allow(unused)]
-    pub fn cli_summary_table(&mut self) -> &mut Self {
-        self.visibility.cli_summary_table = true;
-        self
-    }
-    pub fn status_page(&mut self) -> &mut Self {
-        self.visibility.status_page = true;
-        self
-    }
-    #[allow(unused)]
-    pub fn telemetry(&mut self) -> &mut Self {
-        self.visibility.telemetry = true;
-        self
-    }
-    pub fn file(&mut self, path: &str) -> &mut Self {
-        let loc = self.location.get_or_insert(Default::default());
-        loc.file = Some(path.to_owned());
-        self
-    }
-    pub fn location(
-        &mut self,
-        path: &str,
-        start_line: usize,
-        start_column: usize,
-        end_line: usize,
-        end_column: usize,
-    ) -> &mut Self {
-        let loc = self.location.get_or_insert(Default::default());
-        loc.file = Some(path.to_owned());
-        loc.start_line = Some(start_line);
-        loc.start_column = Some(start_column);
-        loc.end_line = Some(end_line);
-        loc.end_column = Some(end_column);
-        self
-    }
-}
-
-#[test]
-fn test_message() {
-    let mut m = DiagnosticLoggers::new("foo")
-        .logger()
-        .new_entry("id", "name");
-    m.message("hello: {}", &[MessageArg::Code("hello")]);
-    assert_eq!("hello: hello", m.plaintext_message);
-    assert_eq!("hello: `hello`", m.markdown_message);
-
-    let mut m = DiagnosticLoggers::new("foo")
-        .logger()
-        .new_entry("id", "name");
-    m.message(
-        "hello with backticks: {}",
-        &[MessageArg::Code("oh `hello`!")],
-    );
-    assert_eq!("hello with backticks: oh `hello`!", m.plaintext_message);
-    assert_eq!(
-        "hello with backticks: `` oh `hello`! ``",
-        m.markdown_message
-    );
-}
--- a/ruby/extractor/src/extractor.rs
+++ b/ruby/extractor/src/extractor.rs
@@ -1,647 +0,0 @@
-use crate::diagnostics;
-use crate::file_paths;
-use crate::node_types::{self, EntryKind, Field, NodeTypeMap, Storage, TypeName};
-use crate::trap;
-use std::collections::BTreeMap as Map;
-use std::collections::BTreeSet as Set;
-use std::fmt;
-use std::path::Path;
-
-use tree_sitter::{Language, Node, Parser, Range, Tree};
-
-pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
-    let (file_label, fresh) = writer.global_id(&trap::full_id_for_file(
-        &file_paths::normalize_path(absolute_path),
-    ));
-    if fresh {
-        writer.add_tuple(
-            "files",
-            vec![
-                trap::Arg::Label(file_label),
-                trap::Arg::String(file_paths::normalize_path(absolute_path)),
-            ],
-        );
-        populate_parent_folders(writer, file_label, absolute_path.parent());
-    }
-    file_label
-}
-
-fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
-    let (file_label, fresh) = writer.global_id("empty;sourcefile");
-    if fresh {
-        writer.add_tuple(
-            "files",
-            vec![
-                trap::Arg::Label(file_label),
-                trap::Arg::String("".to_string()),
-            ],
-        );
-    }
-    file_label
-}
-
-pub fn populate_empty_location(writer: &mut trap::Writer) {
-    let file_label = populate_empty_file(writer);
-    location(writer, file_label, 0, 0, 0, 0);
-}
-
-pub fn populate_parent_folders(
-    writer: &mut trap::Writer,
-    child_label: trap::Label,
-    path: Option<&Path>,
-) {
-    let mut path = path;
-    let mut child_label = child_label;
-    loop {
-        match path {
-            None => break,
-            Some(folder) => {
-                let (folder_label, fresh) = writer.global_id(&trap::full_id_for_folder(
-                    &file_paths::normalize_path(folder),
-                ));
-                writer.add_tuple(
-                    "containerparent",
-                    vec![
-                        trap::Arg::Label(folder_label),
-                        trap::Arg::Label(child_label),
-                    ],
-                );
-                if fresh {
-                    writer.add_tuple(
-                        "folders",
-                        vec![
-                            trap::Arg::Label(folder_label),
-                            trap::Arg::String(file_paths::normalize_path(folder)),
-                        ],
-                    );
-                    path = folder.parent();
-                    child_label = folder_label;
-                } else {
-                    break;
-                }
-            }
-        }
-    }
-}
-
-fn location(
-    writer: &mut trap::Writer,
-    file_label: trap::Label,
-    start_line: usize,
-    start_column: usize,
-    end_line: usize,
-    end_column: usize,
-) -> trap::Label {
-    let (loc_label, fresh) = writer.global_id(&format!(
-        "loc,{{{}}},{},{},{},{}",
-        file_label, start_line, start_column, end_line, end_column
-    ));
-    if fresh {
-        writer.add_tuple(
-            "locations_default",
-            vec![
-                trap::Arg::Label(loc_label),
-                trap::Arg::Label(file_label),
-                trap::Arg::Int(start_line),
-                trap::Arg::Int(start_column),
-                trap::Arg::Int(end_line),
-                trap::Arg::Int(end_column),
-            ],
-        );
-    }
-    loc_label
-}
-
-/// Extracts the source file at `path`, which is assumed to be canonicalized.
-pub fn extract(
-    language: Language,
-    language_prefix: &str,
-    schema: &NodeTypeMap,
-    diagnostics_writer: &mut diagnostics::LogWriter,
-    trap_writer: &mut trap::Writer,
-    path: &Path,
-    source: &[u8],
-    ranges: &[Range],
-) {
-    let path_str = file_paths::normalize_path(&path);
-    let span = tracing::span!(
-        tracing::Level::TRACE,
-        "extract",
-        file = %path_str
-    );
-
-    let _enter = span.enter();
-
-    tracing::info!("extracting: {}", path_str);
-
-    let mut parser = Parser::new();
-    parser.set_language(language).unwrap();
-    parser.set_included_ranges(ranges).unwrap();
-    let tree = parser.parse(&source, None).expect("Failed to parse file");
-    trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
-    let file_label = populate_file(trap_writer, path);
-    let mut visitor = Visitor::new(
-        source,
-        diagnostics_writer,
-        trap_writer,
-        // TODO: should we handle path strings that are not valid UTF8 better?
-        &path_str,
-        file_label,
-        language_prefix,
-        schema,
-    );
-    traverse(&tree, &mut visitor);
-
-    parser.reset();
-}
-
-struct ChildNode {
-    field_name: Option<&'static str>,
-    label: trap::Label,
-    type_name: TypeName,
-}
-
-struct Visitor<'a> {
-    /// The file path of the source code (as string)
-    path: &'a str,
-    /// The label to use whenever we need to refer to the `@file` entity of this
-    /// source file.
-    file_label: trap::Label,
-    /// The source code as a UTF-8 byte array
-    source: &'a [u8],
-    /// A diagnostics::LogWriter to write diagnostic messages
-    diagnostics_writer: &'a mut diagnostics::LogWriter,
-    /// A trap::Writer to accumulate trap entries
-    trap_writer: &'a mut trap::Writer,
-    /// A counter for top-level child nodes
-    toplevel_child_counter: usize,
-    /// Language-specific name of the AST info table
-    ast_node_info_table_name: String,
-    /// Language-specific name of the tokeninfo table
-    tokeninfo_table_name: String,
-    /// A lookup table from type name to node types
-    schema: &'a NodeTypeMap,
-    /// A stack for gathering information from child nodes. Whenever a node is
-    /// entered the parent's [Label], child counter, and an empty list is pushed.
-    /// All children append their data to the list. When the visitor leaves a
-    /// node the list containing the child data is popped from the stack and
-    /// matched against the dbscheme for the node. If the expectations are met
-    /// the corresponding row definitions are added to the trap_output.
-    stack: Vec<(trap::Label, usize, Vec<ChildNode>)>,
-}
-
-impl<'a> Visitor<'a> {
-    fn new(
-        source: &'a [u8],
-        diagnostics_writer: &'a mut diagnostics::LogWriter,
-        trap_writer: &'a mut trap::Writer,
-        path: &'a str,
-        file_label: trap::Label,
-        language_prefix: &str,
-        schema: &'a NodeTypeMap,
-    ) -> Visitor<'a> {
-        Visitor {
-            path,
-            file_label,
-            source,
-            diagnostics_writer,
-            trap_writer,
-            toplevel_child_counter: 0,
-            ast_node_info_table_name: format!("{}_ast_node_info", language_prefix),
-            tokeninfo_table_name: format!("{}_tokeninfo", language_prefix),
-            schema,
-            stack: Vec::new(),
-        }
-    }
-
-    fn record_parse_error(&mut self, loc: trap::Label, mesg: &diagnostics::DiagnosticMessage) {
-        self.diagnostics_writer.write(mesg);
-        let id = self.trap_writer.fresh_id();
-        let full_error_message = mesg.full_error_message();
-        let severity_code = match mesg.severity {
-            Some(diagnostics::Severity::Error) => 40,
-            Some(diagnostics::Severity::Warning) => 30,
-            Some(diagnostics::Severity::Note) => 20,
-            None => 10,
-        };
-        self.trap_writer.add_tuple(
-            "diagnostics",
-            vec![
-                trap::Arg::Label(id),
-                trap::Arg::Int(severity_code),
-                trap::Arg::String("parse_error".to_string()),
-                trap::Arg::String(mesg.plaintext_message.to_owned()),
-                trap::Arg::String(full_error_message),
-                trap::Arg::Label(loc),
-            ],
-        );
-    }
-
-    fn record_parse_error_for_node(
-        &mut self,
-        message: &str,
-        args: &[diagnostics::MessageArg],
-        node: Node,
-        status_page: bool,
-    ) {
-        let (start_line, start_column, end_line, end_column) = location_for(self, node);
-        let loc = location(
-            self.trap_writer,
-            self.file_label,
-            start_line,
-            start_column,
-            end_line,
-            end_column,
-        );
-        let mut mesg = self.diagnostics_writer.new_entry(
-            "parse-error",
-            "Could not process some files due to syntax errors",
-        );
-        &mesg
-            .severity(diagnostics::Severity::Warning)
-            .location(self.path, start_line, start_column, end_line, end_column)
-            .message(message, args);
-        if status_page {
-            &mesg.status_page();
-        }
-        self.record_parse_error(loc, &mesg);
-    }
-
-    fn enter_node(&mut self, node: Node) -> bool {
-        if node.is_missing() {
-            self.record_parse_error_for_node(
-                "A parse error occurred (expected {} symbol). Check the syntax of the file. If the file is invalid, correct the error or {} the file from analysis.",
-                &[diagnostics::MessageArg::Code(node.kind()), diagnostics::MessageArg::Link("exclude", "https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/customizing-code-scanning")],
-                node,
-                true,
-            );
-            return false;
-        }
-        if node.is_error() {
-            self.record_parse_error_for_node(
-                "A parse error occurred. Check the syntax of the file. If the file is invalid, correct the error or {} the file from analysis.",
-                &[diagnostics::MessageArg::Link("exclude", "https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/customizing-code-scanning")],
-                node,
-                true,
-            );
-            return false;
-        };
-
-        let id = self.trap_writer.fresh_id();
-
-        self.stack.push((id, 0, Vec::new()));
-        true
-    }
-
-    fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) {
-        if node.is_error() || node.is_missing() {
-            return;
-        }
-        let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
-        let (start_line, start_column, end_line, end_column) = location_for(self, node);
-        let loc = location(
-            self.trap_writer,
-            self.file_label,
-            start_line,
-            start_column,
-            end_line,
-            end_column,
-        );
-        let table = self
-            .schema
-            .get(&TypeName {
-                kind: node.kind().to_owned(),
-                named: node.is_named(),
-            })
-            .unwrap();
-        let mut valid = true;
-        let (parent_id, parent_index) = match self.stack.last_mut() {
-            Some(p) if !node.is_extra() => {
-                p.1 += 1;
-                (p.0, p.1 - 1)
-            }
-            _ => {
-                self.toplevel_child_counter += 1;
-                (self.file_label, self.toplevel_child_counter - 1)
-            }
-        };
-        match &table.kind {
-            EntryKind::Token { kind_id, .. } => {
-                self.trap_writer.add_tuple(
-                    &self.ast_node_info_table_name,
-                    vec![
-                        trap::Arg::Label(id),
-                        trap::Arg::Label(parent_id),
-                        trap::Arg::Int(parent_index),
-                        trap::Arg::Label(loc),
-                    ],
-                );
-                self.trap_writer.add_tuple(
-                    &self.tokeninfo_table_name,
-                    vec![
-                        trap::Arg::Label(id),
-                        trap::Arg::Int(*kind_id),
-                        sliced_source_arg(self.source, node),
-                    ],
-                );
-            }
-            EntryKind::Table {
-                fields,
-                name: table_name,
-            } => {
-                if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) {
-                    self.trap_writer.add_tuple(
-                        &self.ast_node_info_table_name,
-                        vec![
-                            trap::Arg::Label(id),
-                            trap::Arg::Label(parent_id),
-                            trap::Arg::Int(parent_index),
-                            trap::Arg::Label(loc),
-                        ],
-                    );
-                    let mut all_args = vec![trap::Arg::Label(id)];
-                    all_args.extend(args);
-                    self.trap_writer.add_tuple(table_name, all_args);
-                }
-            }
-            _ => {
-                self.record_parse_error(
-                    loc,
-                    self.diagnostics_writer
-                        .new_entry(
-                            "parse-error",
-                            "Could not process some files due to syntax errors",
-                        )
-                        .severity(diagnostics::Severity::Warning)
-                        .location(self.path, start_line, start_column, end_line, end_column)
-                        .message(
-                            "Unknown table type: {}",
-                            &[diagnostics::MessageArg::Code(node.kind())],
-                        ),
-                );
-
-                valid = false;
-            }
-        }
-        if valid && !node.is_extra() {
-            // Extra nodes are independent root nodes and do not belong to the parent node
-            // Therefore we should not register them in the parent vector
-            if let Some(parent) = self.stack.last_mut() {
-                parent.2.push(ChildNode {
-                    field_name,
-                    label: id,
-                    type_name: TypeName {
-                        kind: node.kind().to_owned(),
-                        named: node.is_named(),
-                    },
-                });
-            };
-        }
-    }
-
-    fn complex_node(
-        &mut self,
-        node: &Node,
-        fields: &[Field],
-        child_nodes: &[ChildNode],
-        parent_id: trap::Label,
-    ) -> Option<Vec<trap::Arg>> {
-        let mut map: Map<&Option<String>, (&Field, Vec<trap::Arg>)> = Map::new();
-        for field in fields {
-            map.insert(&field.name, (field, Vec::new()));
-        }
-        for child_node in child_nodes {
-            if let Some((field, values)) = map.get_mut(&child_node.field_name.map(|x| x.to_owned()))
-            {
-                //TODO: handle error and missing nodes
-                if self.type_matches(&child_node.type_name, &field.type_info) {
-                    if let node_types::FieldTypeInfo::ReservedWordInt(int_mapping) =
-                        &field.type_info
-                    {
-                        // We can safely unwrap because type_matches checks the key is in the map.
-                        let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap();
-                        values.push(trap::Arg::Int(*int_value));
-                    } else {
-                        values.push(trap::Arg::Label(child_node.label));
-                    }
-                } else if field.name.is_some() {
-                    self.record_parse_error_for_node(
-                        "Type mismatch for field {}::{} with type {} != {}",
-                        &[
-                            diagnostics::MessageArg::Code(node.kind()),
-                            diagnostics::MessageArg::Code(child_node.field_name.unwrap_or("child")),
-                            diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)),
-                            diagnostics::MessageArg::Code(&format!("{:?}", field.type_info)),
-                        ],
-                        *node,
-                        false,
-                    );
-                }
-            } else if child_node.field_name.is_some() || child_node.type_name.named {
-                self.record_parse_error_for_node(
-                    "Value for unknown field: {}::{} and type {}",
-                    &[
-                        diagnostics::MessageArg::Code(node.kind()),
-                        diagnostics::MessageArg::Code(&child_node.field_name.unwrap_or("child")),
-                        diagnostics::MessageArg::Code(&format!("{:?}", child_node.type_name)),
-                    ],
-                    *node,
-                    false,
-                );
-            }
-        }
-        let mut args = Vec::new();
-        let mut is_valid = true;
-        for field in fields {
-            let child_values = &map.get(&field.name).unwrap().1;
-            match &field.storage {
-                Storage::Column { name: column_name } => {
-                    if child_values.len() == 1 {
-                        args.push(child_values.first().unwrap().clone());
-                    } else {
-                        is_valid = false;
-                        let error_message = format!(
-                            "{} for field: {}::{}",
-                            if child_values.is_empty() {
-                                "Missing value"
-                            } else {
-                                "Too many values"
-                            },
-                            node.kind(),
-                            column_name
-                        );
-                        self.record_parse_error_for_node(&error_message, &[], *node, false);
-                    }
-                }
-                Storage::Table {
-                    name: table_name,
-                    has_index,
-                    column_name: _,
-                } => {
-                    for (index, child_value) in child_values.iter().enumerate() {
-                        if !*has_index && index > 0 {
-                            self.record_parse_error_for_node(
-                                "Too many values for field: {}::{}",
-                                &[
-                                    diagnostics::MessageArg::Code(node.kind()),
-                                    diagnostics::MessageArg::Code(table_name),
-                                ],
-                                *node,
-                                false,
-                            );
-                            break;
-                        }
-                        let mut args = vec![trap::Arg::Label(parent_id)];
-                        if *has_index {
-                            args.push(trap::Arg::Int(index))
-                        }
-                        args.push(child_value.clone());
-                        self.trap_writer.add_tuple(table_name, args);
-                    }
-                }
-            }
-        }
-        if is_valid {
-            Some(args)
-        } else {
-            None
-        }
-    }
-
-    fn type_matches(&self, tp: &TypeName, type_info: &node_types::FieldTypeInfo) -> bool {
-        match type_info {
-            node_types::FieldTypeInfo::Single(single_type) => {
-                if tp == single_type {
-                    return true;
-                }
-                if let EntryKind::Union { members } = &self.schema.get(single_type).unwrap().kind {
-                    if self.type_matches_set(tp, members) {
-                        return true;
-                    }
-                }
-            }
-            node_types::FieldTypeInfo::Multiple { types, .. } => {
-                return self.type_matches_set(tp, types);
-            }
-
-            node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
-                return !tp.named && int_mapping.contains_key(&tp.kind)
-            }
-        }
-        false
-    }
-
-    fn type_matches_set(&self, tp: &TypeName, types: &Set<TypeName>) -> bool {
-        if types.contains(tp) {
-            return true;
-        }
-        for other in types.iter() {
-            if let EntryKind::Union { members } = &self.schema.get(other).unwrap().kind {
-                if self.type_matches_set(tp, members) {
-                    return true;
-                }
-            }
-        }
-        false
-    }
-}
-
-// Emit a slice of a source file as an Arg.
-fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
-    let range = n.byte_range();
-    trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
-}
-
-// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
-// The first is the location and label definition, and the second is the
-// 'Located' entry.
-fn location_for(visitor: &mut Visitor, n: Node) -> (usize, usize, usize, usize) {
-    // Tree-sitter row, column values are 0-based while CodeQL starts
-    // counting at 1. In addition Tree-sitter's row and column for the
-    // end position are exclusive while CodeQL's end positions are inclusive.
-    // This means that all values should be incremented by 1 and in addition the
-    // end position needs to be shift 1 to the left. In most cases this means
-    // simply incrementing all values except the end column except in cases where
-    // the end column is 0 (start of a line). In such cases the end position must be
-    // set to the end of the previous line.
-    let start_line = n.start_position().row + 1;
-    let start_col = n.start_position().column + 1;
-    let mut end_line = n.end_position().row + 1;
-    let mut end_col = n.end_position().column;
-    if start_line > end_line || start_line == end_line && start_col > end_col {
-        // the range is empty, clip it to sensible values
-        end_line = start_line;
-        end_col = start_col - 1;
-    } else if end_col == 0 {
-        let source = visitor.source;
-        // end_col = 0 means that we are at the start of a line
-        // unfortunately 0 is invalid as column number, therefore
-        // we should update the end location to be the end of the
-        // previous line
-        let mut index = n.end_byte();
-        if index > 0 && index <= source.len() {
-            index -= 1;
-            if source[index] != b'\n' {
-                visitor.diagnostics_writer.write(
-                    visitor
-                        .diagnostics_writer
-                        .new_entry("internal-error", "Internal error")
-                        .message("Expecting a line break symbol, but none found while correcting end column value", &[])
-                        .severity(diagnostics::Severity::Error),
-                );
-            }
-            end_line -= 1;
-            end_col = 1;
-            while index > 0 && source[index - 1] != b'\n' {
-                index -= 1;
-                end_col += 1;
-            }
-        } else {
-            visitor.diagnostics_writer.write(
-                visitor
-                    .diagnostics_writer
-                    .new_entry("internal-error", "Internal error")
-                    .message(
-                        "Cannot correct end column value: end_byte index {} is not in range [1,{}].",
-                        &[
-                            diagnostics::MessageArg::Code(&index.to_string()),
-                            diagnostics::MessageArg::Code(&source.len().to_string()),
-                        ],
-                    )
-                    .severity(diagnostics::Severity::Error),
-            );
-        }
-    }
-    (start_line, start_col, end_line, end_col)
-}
-
-fn traverse(tree: &Tree, visitor: &mut Visitor) {
-    let cursor = &mut tree.walk();
-    visitor.enter_node(cursor.node());
-    let mut recurse = true;
-    loop {
-        if recurse && cursor.goto_first_child() {
-            recurse = visitor.enter_node(cursor.node());
-        } else {
-            visitor.leave_node(cursor.field_name(), cursor.node());
-
-            if cursor.goto_next_sibling() {
-                recurse = visitor.enter_node(cursor.node());
-            } else if cursor.goto_parent() {
-                recurse = false;
-            } else {
-                break;
-            }
-        }
-    }
-}
-
-// Numeric indices.
-#[derive(Debug, Copy, Clone)]
-struct Index(usize);
-
-impl fmt::Display for Index {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
--- a/ruby/extractor/src/file_paths.rs
+++ b/ruby/extractor/src/file_paths.rs
@@ -1,135 +0,0 @@
-use std::path::{Path, PathBuf};
-
-/// Normalizes the path according the common CodeQL specification. Assumes that
-/// `path` has already been canonicalized using `std::fs::canonicalize`.
-pub fn normalize_path(path: &Path) -> String {
-    if cfg!(windows) {
-        // The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
-        // have to do a bit of work removing certain prefixes and replacing
-        // backslashes.
-        let mut components: Vec<String> = Vec::new();
-        for component in path.components() {
-            match component {
-                std::path::Component::Prefix(prefix) => match prefix.kind() {
-                    std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
-                        components.push(format!("{}:", letter as char));
-                    }
-                    std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
-                        components.push(x.to_string_lossy().to_string());
-                    }
-                    std::path::Prefix::UNC(server, share)
-                    | std::path::Prefix::VerbatimUNC(server, share) => {
-                        components.push(server.to_string_lossy().to_string());
-                        components.push(share.to_string_lossy().to_string());
-                    }
-                },
-                std::path::Component::Normal(n) => {
-                    components.push(n.to_string_lossy().to_string());
-                }
-                std::path::Component::RootDir => {}
-                std::path::Component::CurDir => {}
-                std::path::Component::ParentDir => {}
-            }
-        }
-        components.join("/")
-    } else {
-        // For other operating systems, we can use the canonicalized path
-        // without modifications.
-        format!("{}", path.display())
-    }
-}
-
-/// Convert a user-supplied path to an absolute path, and convert it to a verbatim path on Windows.
-pub fn path_from_string(path: &str) -> PathBuf {
-    let mut path = PathBuf::from(path);
-    // make path absolute
-    if path.is_relative() {
-        path = std::env::current_dir().unwrap().join(path)
-    };
-    let mut components = path.components();
-
-    // make Windows paths verbatim (with `\\?\` prefixes) which allow for extended-length paths.
-    let mut result = match components.next() {
-        None => unreachable!("empty path"),
-
-        Some(component) => match component {
-            std::path::Component::Prefix(prefix) => match prefix.kind() {
-                std::path::Prefix::Disk(drive) => {
-                    let root = format!(r"\\?\{}:\", drive as char);
-                    PathBuf::from(root)
-                }
-                std::path::Prefix::UNC(server, share) => {
-                    let mut root = std::ffi::OsString::from(r"\\?\UNC\");
-                    root.push(server);
-                    root.push(r"\");
-                    root.push(share);
-                    PathBuf::from(root)
-                }
-                std::path::Prefix::Verbatim(_)
-                | std::path::Prefix::VerbatimUNC(_, _)
-                | std::path::Prefix::VerbatimDisk(_)
-                | std::path::Prefix::DeviceNS(_) => Path::new(&component).to_path_buf(),
-            },
-            _ => Path::new(&component).to_path_buf(),
-        },
-    };
-    // remove `.` and `..` components
-    for component in components {
-        match component {
-            std::path::Component::CurDir => continue,
-            std::path::Component::ParentDir => {
-                result.pop();
-            }
-            _ => result.push(component),
-        }
-    }
-    result
-}
-
-pub fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
-    let mut result = PathBuf::from(dir);
-    for component in path.components() {
-        match component {
-            std::path::Component::Prefix(prefix) => match prefix.kind() {
-                std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
-                    result.push(format!("{}_", letter as char))
-                }
-                std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
-                    result.push(x);
-                }
-                std::path::Prefix::UNC(server, share)
-                | std::path::Prefix::VerbatimUNC(server, share) => {
-                    result.push("unc");
-                    result.push(server);
-                    result.push(share);
-                }
-            },
-            std::path::Component::RootDir => {
-                // skip
-            }
-            std::path::Component::Normal(_) => {
-                result.push(component);
-            }
-            std::path::Component::CurDir => {
-                // skip
-            }
-            std::path::Component::ParentDir => {
-                result.pop();
-            }
-        }
-    }
-    if !ext.is_empty() {
-        match result.extension() {
-            Some(x) => {
-                let mut new_ext = x.to_os_string();
-                new_ext.push(".");
-                new_ext.push(ext);
-                result.set_extension(new_ext);
-            }
-            None => {
-                result.set_extension(ext);
-            }
-        }
-    }
-    result
-}
--- a/ruby/extractor/src/generator/dbscheme.rs
+++ b/ruby/extractor/src/generator/dbscheme.rs
@@ -1,132 +0,0 @@
-use std::collections::BTreeSet as Set;
-use std::fmt;
-
-use crate::generator::ql;
-
-/// Represents a distinct entry in the database schema.
-pub enum Entry<'a> {
-    /// An entry defining a database table.
-    Table(Table<'a>),
-    /// An entry defining a database table.
-    Case(Case<'a>),
-    /// An entry defining type that is a union of other types.
-    Union(Union<'a>),
-}
-
-/// A table in the database schema.
-pub struct Table<'a> {
-    pub name: &'a str,
-    pub columns: Vec<Column<'a>>,
-    pub keysets: Option<Vec<&'a str>>,
-}
-
-/// A union in the database schema.
-pub struct Union<'a> {
-    pub name: &'a str,
-    pub members: Set<&'a str>,
-}
-
-/// A table in the database schema.
-pub struct Case<'a> {
-    pub name: &'a str,
-    pub column: &'a str,
-    pub branches: Vec<(usize, &'a str)>,
-}
-
-/// A column in a table.
-pub struct Column<'a> {
-    pub db_type: DbColumnType,
-    pub name: &'a str,
-    pub unique: bool,
-    pub ql_type: ql::Type<'a>,
-    pub ql_type_is_ref: bool,
-}
-
-/// The database column type.
-pub enum DbColumnType {
-    Int,
-    String,
-}
-
-impl<'a> fmt::Display for Case<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        writeln!(f, "case @{}.{} of", &self.name, &self.column)?;
-        let mut sep = " ";
-        for (c, tp) in &self.branches {
-            writeln!(f, "{} {} = @{}", sep, c, tp)?;
-            sep = "|";
-        }
-        writeln!(f, ";")
-    }
-}
-
-impl<'a> fmt::Display for Table<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if let Some(keyset) = &self.keysets {
-            write!(f, "#keyset[")?;
-            for (key_index, key) in keyset.iter().enumerate() {
-                if key_index > 0 {
-                    write!(f, ", ")?;
-                }
-                write!(f, "{}", key)?;
-            }
-            writeln!(f, "]")?;
-        }
-
-        writeln!(f, "{}(", self.name)?;
-        for (column_index, column) in self.columns.iter().enumerate() {
-            write!(f, "  ")?;
-            if column.unique {
-                write!(f, "unique ")?;
-            }
-            write!(
-                f,
-                "{} ",
-                match column.db_type {
-                    DbColumnType::Int => "int",
-                    DbColumnType::String => "string",
-                }
-            )?;
-            write!(f, "{}: {}", column.name, column.ql_type)?;
-            if column.ql_type_is_ref {
-                write!(f, " ref")?;
-            }
-            if column_index + 1 != self.columns.len() {
-                write!(f, ",")?;
-            }
-            writeln!(f)?;
-        }
-        write!(f, ");")?;
-
-        Ok(())
-    }
-}
-
-impl<'a> fmt::Display for Union<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "@{} = ", self.name)?;
-        let mut first = true;
-        for member in &self.members {
-            if first {
-                first = false;
-            } else {
-                write!(f, " | ")?;
-            }
-            write!(f, "@{}", member)?;
-        }
-        Ok(())
-    }
-}
-
-/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`.
-pub fn write<'a>(file: &mut dyn std::io::Write, entries: &'a [Entry]) -> std::io::Result<()> {
-    for entry in entries {
-        match entry {
-            Entry::Case(case) => write!(file, "{}\n\n", case)?,
-            Entry::Table(table) => write!(file, "{}\n\n", table)?,
-            Entry::Union(union) => write!(file, "{}\n\n", union)?,
-        }
-    }
-
-    Ok(())
-}
--- a/ruby/extractor/src/generator/language.rs
+++ b/ruby/extractor/src/generator/language.rs
@@ -1,4 +0,0 @@
-pub struct Language {
-    pub name: String,
-    pub node_types: &'static str,
-}
--- a/ruby/extractor/src/generator/mod.rs
+++ b/ruby/extractor/src/generator/mod.rs
@@ -1,4 +0,0 @@
-pub mod dbscheme;
-pub mod language;
-pub mod ql;
-pub mod ql_gen;
--- a/ruby/extractor/src/generator/ql.rs
+++ b/ruby/extractor/src/generator/ql.rs
@@ -1,295 +0,0 @@
-use std::collections::BTreeSet;
-use std::fmt;
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub enum TopLevel<'a> {
-    Class(Class<'a>),
-    Import(Import<'a>),
-    Module(Module<'a>),
-}
-
-impl<'a> fmt::Display for TopLevel<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            TopLevel::Import(imp) => write!(f, "{}", imp),
-            TopLevel::Class(cls) => write!(f, "{}", cls),
-            TopLevel::Module(m) => write!(f, "{}", m),
-        }
-    }
-}
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct Import<'a> {
-    pub module: &'a str,
-    pub alias: Option<&'a str>,
-}
-
-impl<'a> fmt::Display for Import<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "import {}", &self.module)?;
-        if let Some(name) = &self.alias {
-            write!(f, " as {}", name)?;
-        }
-        Ok(())
-    }
-}
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct Class<'a> {
-    pub qldoc: Option<String>,
-    pub name: &'a str,
-    pub is_abstract: bool,
-    pub supertypes: BTreeSet<Type<'a>>,
-    pub characteristic_predicate: Option<Expression<'a>>,
-    pub predicates: Vec<Predicate<'a>>,
-}
-
-impl<'a> fmt::Display for Class<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let Some(qldoc) = &self.qldoc {
-            write!(f, "/** {} */", qldoc)?;
-        }
-        if self.is_abstract {
-            write!(f, "abstract ")?;
-        }
-        write!(f, "class {} extends ", &self.name)?;
-        for (index, supertype) in self.supertypes.iter().enumerate() {
-            if index > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{}", supertype)?;
-        }
-        writeln!(f, " {{ ")?;
-
-        if let Some(charpred) = &self.characteristic_predicate {
-            writeln!(
-                f,
-                "  {}",
-                Predicate {
-                    qldoc: None,
-                    name: self.name,
-                    overridden: false,
-                    is_final: false,
-                    return_type: None,
-                    formal_parameters: vec![],
-                    body: charpred.clone(),
-                }
-            )?;
-        }
-
-        for predicate in &self.predicates {
-            writeln!(f, "  {}", predicate)?;
-        }
-
-        write!(f, "}}")?;
-
-        Ok(())
-    }
-}
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct Module<'a> {
-    pub qldoc: Option<String>,
-    pub name: &'a str,
-    pub body: Vec<TopLevel<'a>>,
-}
-
-impl<'a> fmt::Display for Module<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let Some(qldoc) = &self.qldoc {
-            write!(f, "/** {} */", qldoc)?;
-        }
-        writeln!(f, "module {} {{ ", self.name)?;
-        for decl in &self.body {
-            writeln!(f, "  {}", decl)?;
-        }
-        write!(f, "}}")?;
-        Ok(())
-    }
-}
-// The QL type of a column.
-#[derive(Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
-pub enum Type<'a> {
-    /// Primitive `int` type.
-    Int,
-
-    /// Primitive `string` type.
-    String,
-
-    /// A database type that will need to be referred to with an `@` prefix.
-    At(&'a str),
-
-    /// A user-defined type.
-    Normal(&'a str),
-}
-
-impl<'a> fmt::Display for Type<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Type::Int => write!(f, "int"),
-            Type::String => write!(f, "string"),
-            Type::Normal(name) => write!(f, "{}", name),
-            Type::At(name) => write!(f, "@{}", name),
-        }
-    }
-}
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub enum Expression<'a> {
-    Var(&'a str),
-    String(&'a str),
-    Integer(usize),
-    Pred(&'a str, Vec<Expression<'a>>),
-    And(Vec<Expression<'a>>),
-    Or(Vec<Expression<'a>>),
-    Equals(Box<Expression<'a>>, Box<Expression<'a>>),
-    Dot(Box<Expression<'a>>, &'a str, Vec<Expression<'a>>),
-    Aggregate {
-        name: &'a str,
-        vars: Vec<FormalParameter<'a>>,
-        range: Option<Box<Expression<'a>>>,
-        expr: Box<Expression<'a>>,
-        second_expr: Option<Box<Expression<'a>>>,
-    },
-}
-
-impl<'a> fmt::Display for Expression<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Expression::Var(x) => write!(f, "{}", x),
-            Expression::String(s) => write!(f, "\"{}\"", s),
-            Expression::Integer(n) => write!(f, "{}", n),
-            Expression::Pred(n, args) => {
-                write!(f, "{}(", n)?;
-                for (index, arg) in args.iter().enumerate() {
-                    if index > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}", arg)?;
-                }
-                write!(f, ")")
-            }
-            Expression::And(conjuncts) => {
-                if conjuncts.is_empty() {
-                    write!(f, "any()")
-                } else {
-                    for (index, conjunct) in conjuncts.iter().enumerate() {
-                        if index > 0 {
-                            write!(f, " and ")?;
-                        }
-                        write!(f, "({})", conjunct)?;
-                    }
-                    Ok(())
-                }
-            }
-            Expression::Or(disjuncts) => {
-                if disjuncts.is_empty() {
-                    write!(f, "none()")
-                } else {
-                    for (index, disjunct) in disjuncts.iter().enumerate() {
-                        if index > 0 {
-                            write!(f, " or ")?;
-                        }
-                        write!(f, "({})", disjunct)?;
-                    }
-                    Ok(())
-                }
-            }
-            Expression::Equals(a, b) => write!(f, "{} = {}", a, b),
-            Expression::Dot(x, member_pred, args) => {
-                write!(f, "{}.{}(", x, member_pred)?;
-                for (index, arg) in args.iter().enumerate() {
-                    if index > 0 {
-                        write!(f, ", ")?;
-                    }
-                    write!(f, "{}", arg)?;
-                }
-                write!(f, ")")
-            }
-            Expression::Aggregate {
-                name,
-                vars,
-                range,
-                expr,
-                second_expr,
-            } => {
-                write!(f, "{}(", name)?;
-                if !vars.is_empty() {
-                    for (index, var) in vars.iter().enumerate() {
-                        if index > 0 {
-                            write!(f, ", ")?;
-                        }
-                        write!(f, "{}", var)?;
-                    }
-                    write!(f, " | ")?;
-                }
-                if let Some(range) = range {
-                    write!(f, "{} | ", range)?;
-                }
-                write!(f, "{}", expr)?;
-                if let Some(second_expr) = second_expr {
-                    write!(f, ", {}", second_expr)?;
-                }
-                write!(f, ")")
-            }
-        }
-    }
-}
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct Predicate<'a> {
-    pub qldoc: Option<String>,
-    pub name: &'a str,
-    pub overridden: bool,
-    pub is_final: bool,
-    pub return_type: Option<Type<'a>>,
-    pub formal_parameters: Vec<FormalParameter<'a>>,
-    pub body: Expression<'a>,
-}
-
-impl<'a> fmt::Display for Predicate<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let Some(qldoc) = &self.qldoc {
-            write!(f, "/** {} */", qldoc)?;
-        }
-        if self.is_final {
-            write!(f, "final ")?;
-        }
-        if self.overridden {
-            write!(f, "override ")?;
-        }
-        match &self.return_type {
-            None => write!(f, "predicate ")?,
-            Some(return_type) => write!(f, "{} ", return_type)?,
-        }
-        write!(f, "{}(", self.name)?;
-        for (index, param) in self.formal_parameters.iter().enumerate() {
-            if index > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{}", param)?;
-        }
-        write!(f, ") {{ {} }}", self.body)?;
-
-        Ok(())
-    }
-}
-
-#[derive(Clone, Eq, PartialEq, Hash)]
-pub struct FormalParameter<'a> {
-    pub name: &'a str,
-    pub param_type: Type<'a>,
-}
-
-impl<'a> fmt::Display for FormalParameter<'a> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{} {}", self.param_type, self.name)
-    }
-}
-
-/// Generates a QL library by writing the given `elements` to the `file`.
-pub fn write<'a>(file: &mut dyn std::io::Write, elements: &'a [TopLevel]) -> std::io::Result<()> {
-    for element in elements {
-        write!(file, "{}\n\n", &element)?;
-    }
-    Ok(())
-}
--- a/ruby/extractor/src/generator/ql_gen.rs
+++ b/ruby/extractor/src/generator/ql_gen.rs
@@ -1,566 +0,0 @@
-use std::collections::BTreeSet;
-
-use crate::{generator::ql, node_types};
-
-/// Creates the hard-coded `AstNode` class that acts as a supertype of all
-/// classes we generate.
-pub fn create_ast_node_class<'a>(ast_node: &'a str, node_info_table: &'a str) -> ql::Class<'a> {
-    // Default implementation of `toString` calls `this.getAPrimaryQlClass()`
-    let to_string = ql::Predicate {
-        qldoc: Some(String::from(
-            "Gets a string representation of this element.",
-        )),
-        name: "toString",
-        overridden: false,
-        is_final: false,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: ql::Expression::Equals(
-            Box::new(ql::Expression::Var("result")),
-            Box::new(ql::Expression::Dot(
-                Box::new(ql::Expression::Var("this")),
-                "getAPrimaryQlClass",
-                vec![],
-            )),
-        ),
-    };
-    let get_location = ql::Predicate {
-        name: "getLocation",
-        qldoc: Some(String::from("Gets the location of this element.")),
-        overridden: false,
-        is_final: true,
-        return_type: Some(ql::Type::Normal("L::Location")),
-        formal_parameters: vec![],
-        body: ql::Expression::Pred(
-            node_info_table,
-            vec![
-                ql::Expression::Var("this"),
-                ql::Expression::Var("_"),      // parent
-                ql::Expression::Var("_"),      // parent index
-                ql::Expression::Var("result"), // location
-            ],
-        ),
-    };
-    let get_a_field_or_child = create_none_predicate(
-        Some(String::from("Gets a field or child node of this node.")),
-        "getAFieldOrChild",
-        false,
-        Some(ql::Type::Normal("AstNode")),
-    );
-    let get_parent = ql::Predicate {
-        qldoc: Some(String::from("Gets the parent of this element.")),
-        name: "getParent",
-        overridden: false,
-        is_final: true,
-        return_type: Some(ql::Type::Normal("AstNode")),
-        formal_parameters: vec![],
-        body: ql::Expression::Pred(
-            node_info_table,
-            vec![
-                ql::Expression::Var("this"),
-                ql::Expression::Var("result"),
-                ql::Expression::Var("_"), // parent index
-                ql::Expression::Var("_"), // location
-            ],
-        ),
-    };
-    let get_parent_index = ql::Predicate {
-        qldoc: Some(String::from(
-            "Gets the index of this node among the children of its parent.",
-        )),
-        name: "getParentIndex",
-        overridden: false,
-        is_final: true,
-        return_type: Some(ql::Type::Int),
-        formal_parameters: vec![],
-        body: ql::Expression::Pred(
-            node_info_table,
-            vec![
-                ql::Expression::Var("this"),
-                ql::Expression::Var("_"),      // parent
-                ql::Expression::Var("result"), // parent index
-                ql::Expression::Var("_"),      // location
-            ],
-        ),
-    };
-    let get_a_primary_ql_class = ql::Predicate {
-        qldoc: Some(String::from(
-            "Gets the name of the primary QL class for this element.",
-        )),
-        name: "getAPrimaryQlClass",
-        overridden: false,
-        is_final: false,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: ql::Expression::Equals(
-            Box::new(ql::Expression::Var("result")),
-            Box::new(ql::Expression::String("???")),
-        ),
-    };
-    let get_primary_ql_classes = ql::Predicate {
-        qldoc: Some(
-            "Gets a comma-separated list of the names of the primary CodeQL \
-             classes to which this element belongs."
-                .to_owned(),
-        ),
-        name: "getPrimaryQlClasses",
-        overridden: false,
-        is_final: false,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: ql::Expression::Equals(
-            Box::new(ql::Expression::Var("result")),
-            Box::new(ql::Expression::Aggregate {
-                name: "concat",
-                vars: vec![],
-                range: None,
-                expr: Box::new(ql::Expression::Dot(
-                    Box::new(ql::Expression::Var("this")),
-                    "getAPrimaryQlClass",
-                    vec![],
-                )),
-                second_expr: Some(Box::new(ql::Expression::String(","))),
-            }),
-        ),
-    };
-    ql::Class {
-        qldoc: Some(String::from("The base class for all AST nodes")),
-        name: "AstNode",
-        is_abstract: false,
-        supertypes: vec![ql::Type::At(ast_node)].into_iter().collect(),
-        characteristic_predicate: None,
-        predicates: vec![
-            to_string,
-            get_location,
-            get_parent,
-            get_parent_index,
-            get_a_field_or_child,
-            get_a_primary_ql_class,
-            get_primary_ql_classes,
-        ],
-    }
-}
-
-pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Class<'a> {
-    let tokeninfo_arity = 3; // id, kind, value
-    let get_value = ql::Predicate {
-        qldoc: Some(String::from("Gets the value of this token.")),
-        name: "getValue",
-        overridden: false,
-        is_final: true,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: create_get_field_expr_for_column_storage("result", tokeninfo, 1, tokeninfo_arity),
-    };
-    let to_string = ql::Predicate {
-        qldoc: Some(String::from(
-            "Gets a string representation of this element.",
-        )),
-        name: "toString",
-        overridden: true,
-        is_final: true,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: ql::Expression::Equals(
-            Box::new(ql::Expression::Var("result")),
-            Box::new(ql::Expression::Dot(
-                Box::new(ql::Expression::Var("this")),
-                "getValue",
-                vec![],
-            )),
-        ),
-    };
-    ql::Class {
-        qldoc: Some(String::from("A token.")),
-        name: "Token",
-        is_abstract: false,
-        supertypes: vec![ql::Type::At(token_type), ql::Type::Normal("AstNode")]
-            .into_iter()
-            .collect(),
-        characteristic_predicate: None,
-        predicates: vec![
-            get_value,
-            to_string,
-            create_get_a_primary_ql_class("Token", false),
-        ],
-    }
-}
-
-// Creates the `ReservedWord` class.
-pub fn create_reserved_word_class(db_name: &str) -> ql::Class {
-    let class_name = "ReservedWord";
-    let get_a_primary_ql_class = create_get_a_primary_ql_class(class_name, true);
-    ql::Class {
-        qldoc: Some(String::from("A reserved word.")),
-        name: class_name,
-        is_abstract: false,
-        supertypes: vec![ql::Type::At(db_name), ql::Type::Normal("Token")]
-            .into_iter()
-            .collect(),
-        characteristic_predicate: None,
-        predicates: vec![get_a_primary_ql_class],
-    }
-}
-
-/// Creates a predicate whose body is `none()`.
-fn create_none_predicate<'a>(
-    qldoc: Option<String>,
-    name: &'a str,
-    overridden: bool,
-    return_type: Option<ql::Type<'a>>,
-) -> ql::Predicate<'a> {
-    ql::Predicate {
-        qldoc,
-        name,
-        overridden,
-        is_final: false,
-        return_type,
-        formal_parameters: Vec::new(),
-        body: ql::Expression::Pred("none", vec![]),
-    }
-}
-
-/// Creates an overridden `getAPrimaryQlClass` predicate that returns the given
-/// name.
-fn create_get_a_primary_ql_class(class_name: &str, is_final: bool) -> ql::Predicate {
-    ql::Predicate {
-        qldoc: Some(String::from(
-            "Gets the name of the primary QL class for this element.",
-        )),
-        name: "getAPrimaryQlClass",
-        overridden: true,
-        is_final,
-        return_type: Some(ql::Type::String),
-        formal_parameters: vec![],
-        body: ql::Expression::Equals(
-            Box::new(ql::Expression::Var("result")),
-            Box::new(ql::Expression::String(class_name)),
-        ),
-    }
-}
-
-/// Returns an expression to get a field that's defined as a column in the parent's table.
-///
-/// # Arguments
-///
-/// * `result_var_name` - the name of the variable to which the resulting value should be bound
-/// * `table_name` - the name of parent's defining table
-/// * `column_index` - the index in that table that defines the field
-/// * `arity` - the total number of columns in the table
-fn create_get_field_expr_for_column_storage<'a>(
-    result_var_name: &'a str,
-    table_name: &'a str,
-    column_index: usize,
-    arity: usize,
-) -> ql::Expression<'a> {
-    let num_underscores_before = column_index;
-    let num_underscores_after = arity - 2 - num_underscores_before;
-    ql::Expression::Pred(
-        table_name,
-        [
-            vec![ql::Expression::Var("this")],
-            vec![ql::Expression::Var("_"); num_underscores_before],
-            vec![ql::Expression::Var(result_var_name)],
-            vec![ql::Expression::Var("_"); num_underscores_after],
-        ]
-        .concat(),
-    )
-}
-
-/// Returns an expression to get the field with the given index from its
-/// auxiliary table. The index name can be "_" so the expression will hold for
-/// all indices.
-fn create_get_field_expr_for_table_storage<'a>(
-    result_var_name: &'a str,
-    table_name: &'a str,
-    index_var_name: Option<&'a str>,
-) -> ql::Expression<'a> {
-    ql::Expression::Pred(
-        table_name,
-        match index_var_name {
-            Some(index_var_name) => vec![
-                ql::Expression::Var("this"),
-                ql::Expression::Var(index_var_name),
-                ql::Expression::Var(result_var_name),
-            ],
-            None => vec![ql::Expression::Var("this"), ql::Expression::Var("result")],
-        },
-    )
-}
-
-/// Creates a pair consisting of a predicate to get the given field, and an
-/// optional expression that will get the same field. When the field can occur
-/// multiple times, the predicate will take an index argument, while the
-/// expression will use the "don't care" expression to hold for all occurrences.
-///
-/// # Arguments
-///
-/// `main_table_name` - the name of the defining table for the parent node
-/// `main_table_arity` - the number of columns in the main table
-/// `main_table_column_index` - a mutable reference to a column index indicating
-/// where the field is in the main table. If this is used (i.e. the field has
-/// column storage), then the index is incremented.
-/// `parent_name` - the name of the parent node
-/// `field` - the field whose getters we are creating
-/// `field_type` - the db name of the field's type (possibly being a union we created)
-fn create_field_getters<'a>(
-    main_table_name: &'a str,
-    main_table_arity: usize,
-    main_table_column_index: &mut usize,
-    field: &'a node_types::Field,
-    nodes: &'a node_types::NodeTypeMap,
-) -> (ql::Predicate<'a>, Option<ql::Expression<'a>>) {
-    let return_type = match &field.type_info {
-        node_types::FieldTypeInfo::Single(t) => {
-            Some(ql::Type::Normal(&nodes.get(t).unwrap().ql_class_name))
-        }
-        node_types::FieldTypeInfo::Multiple {
-            types: _,
-            dbscheme_union: _,
-            ql_class,
-        } => Some(ql::Type::Normal(ql_class)),
-        node_types::FieldTypeInfo::ReservedWordInt(_) => Some(ql::Type::String),
-    };
-    let formal_parameters = match &field.storage {
-        node_types::Storage::Column { .. } => vec![],
-        node_types::Storage::Table { has_index, .. } => {
-            if *has_index {
-                vec![ql::FormalParameter {
-                    name: "i",
-                    param_type: ql::Type::Int,
-                }]
-            } else {
-                vec![]
-            }
-        }
-    };
-
-    // For the expression to get a value, what variable name should the result
-    // be bound to?
-    let get_value_result_var_name = match &field.type_info {
-        node_types::FieldTypeInfo::ReservedWordInt(_) => "value",
-        node_types::FieldTypeInfo::Single(_) => "result",
-        node_types::FieldTypeInfo::Multiple { .. } => "result",
-    };
-
-    // Two expressions for getting the value. One that's suitable use in the
-    // getter predicate (where there may be a specific index), and another for
-    // use in `getAFieldOrChild` (where we use a "don't care" expression to
-    // match any index).
-    let (get_value, get_value_any_index) = match &field.storage {
-        node_types::Storage::Column { name: _ } => {
-            let column_index = *main_table_column_index;
-            *main_table_column_index += 1;
-            (
-                create_get_field_expr_for_column_storage(
-                    get_value_result_var_name,
-                    main_table_name,
-                    column_index,
-                    main_table_arity,
-                ),
-                create_get_field_expr_for_column_storage(
-                    get_value_result_var_name,
-                    main_table_name,
-                    column_index,
-                    main_table_arity,
-                ),
-            )
-        }
-        node_types::Storage::Table {
-            name: field_table_name,
-            has_index,
-            column_name: _,
-        } => (
-            create_get_field_expr_for_table_storage(
-                get_value_result_var_name,
-                field_table_name,
-                if *has_index { Some("i") } else { None },
-            ),
-            create_get_field_expr_for_table_storage(
-                get_value_result_var_name,
-                field_table_name,
-                if *has_index { Some("_") } else { None },
-            ),
-        ),
-    };
-    let (body, optional_expr) = match &field.type_info {
-        node_types::FieldTypeInfo::ReservedWordInt(int_mapping) => {
-            // Create an expression that binds the corresponding string to `result` for each `value`, e.g.:
-            //   result = "foo" and value = 0 or
-            //   result = "bar" and value = 1 or
-            //   result = "baz" and value = 2
-            let disjuncts = int_mapping
-                .iter()
-                .map(|(token_str, (value, _))| {
-                    ql::Expression::And(vec![
-                        ql::Expression::Equals(
-                            Box::new(ql::Expression::Var("result")),
-                            Box::new(ql::Expression::String(token_str)),
-                        ),
-                        ql::Expression::Equals(
-                            Box::new(ql::Expression::Var("value")),
-                            Box::new(ql::Expression::Integer(*value)),
-                        ),
-                    ])
-                })
-                .collect();
-            (
-                ql::Expression::Aggregate {
-                    name: "exists",
-                    vars: vec![ql::FormalParameter {
-                        name: "value",
-                        param_type: ql::Type::Int,
-                    }],
-                    range: Some(Box::new(get_value)),
-                    expr: Box::new(ql::Expression::Or(disjuncts)),
-                    second_expr: None,
-                },
-                // Since the getter returns a string and not an AstNode, it won't be part of getAFieldOrChild:
-                None,
-            )
-        }
-        node_types::FieldTypeInfo::Single(_) | node_types::FieldTypeInfo::Multiple { .. } => {
-            (get_value, Some(get_value_any_index))
-        }
-    };
-    let qldoc = match &field.name {
-        Some(name) => format!("Gets the node corresponding to the field `{}`.", name),
-        None => {
-            if formal_parameters.is_empty() {
-                "Gets the child of this node.".to_owned()
-            } else {
-                "Gets the `i`th child of this node.".to_owned()
-            }
-        }
-    };
-    (
-        ql::Predicate {
-            qldoc: Some(qldoc),
-            name: &field.getter_name,
-            overridden: false,
-            is_final: true,
-            return_type,
-            formal_parameters,
-            body,
-        },
-        optional_expr,
-    )
-}
-
-/// Converts the given node types into CodeQL classes wrapping the dbscheme.
-pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<ql::TopLevel> {
-    let mut classes: Vec<ql::TopLevel> = Vec::new();
-    let mut token_kinds = BTreeSet::new();
-    for (type_name, node) in nodes {
-        if let node_types::EntryKind::Token { .. } = &node.kind {
-            if type_name.named {
-                token_kinds.insert(&type_name.kind);
-            }
-        }
-    }
-
-    for (type_name, node) in nodes {
-        match &node.kind {
-            node_types::EntryKind::Token { kind_id: _ } => {
-                if type_name.named {
-                    let get_a_primary_ql_class =
-                        create_get_a_primary_ql_class(&node.ql_class_name, true);
-                    let mut supertypes: BTreeSet<ql::Type> = BTreeSet::new();
-                    supertypes.insert(ql::Type::At(&node.dbscheme_name));
-                    supertypes.insert(ql::Type::Normal("Token"));
-                    classes.push(ql::TopLevel::Class(ql::Class {
-                        qldoc: Some(format!("A class representing `{}` tokens.", type_name.kind)),
-                        name: &node.ql_class_name,
-                        is_abstract: false,
-                        supertypes,
-                        characteristic_predicate: None,
-                        predicates: vec![get_a_primary_ql_class],
-                    }));
-                }
-            }
-            node_types::EntryKind::Union { members: _ } => {
-                // It's a tree-sitter supertype node, so we're wrapping a dbscheme
-                // union type.
-                classes.push(ql::TopLevel::Class(ql::Class {
-                    qldoc: None,
-                    name: &node.ql_class_name,
-                    is_abstract: false,
-                    supertypes: vec![
-                        ql::Type::At(&node.dbscheme_name),
-                        ql::Type::Normal("AstNode"),
-                    ]
-                    .into_iter()
-                    .collect(),
-                    characteristic_predicate: None,
-                    predicates: vec![],
-                }));
-            }
-            node_types::EntryKind::Table {
-                name: main_table_name,
-                fields,
-            } => {
-                if fields.is_empty() {
-                    panic!("Encountered node '{}' with no fields", type_name.kind);
-                }
-
-                // Count how many columns there will be in the main table. There
-                // will be one for the id, plus one for each field that's stored
-                // as a column.
-                let main_table_arity = 1 + fields
-                    .iter()
-                    .filter(|&f| matches!(f.storage, node_types::Storage::Column { .. }))
-                    .count();
-
-                let main_class_name = &node.ql_class_name;
-                let mut main_class = ql::Class {
-                    qldoc: Some(format!("A class representing `{}` nodes.", type_name.kind)),
-                    name: main_class_name,
-                    is_abstract: false,
-                    supertypes: vec![
-                        ql::Type::At(&node.dbscheme_name),
-                        ql::Type::Normal("AstNode"),
-                    ]
-                    .into_iter()
-                    .collect(),
-                    characteristic_predicate: None,
-                    predicates: vec![create_get_a_primary_ql_class(main_class_name, true)],
-                };
-
-                let mut main_table_column_index: usize = 0;
-                let mut get_child_exprs: Vec<ql::Expression> = Vec::new();
-
-                // Iterate through the fields, creating:
-                // - classes to wrap union types if fields need them,
-                // - predicates to access the fields,
-                // - the QL expressions to access the fields that will be part of getAFieldOrChild.
-                for field in fields {
-                    let (get_pred, get_child_expr) = create_field_getters(
-                        main_table_name,
-                        main_table_arity,
-                        &mut main_table_column_index,
-                        field,
-                        nodes,
-                    );
-                    main_class.predicates.push(get_pred);
-                    if let Some(get_child_expr) = get_child_expr {
-                        get_child_exprs.push(get_child_expr)
-                    }
-                }
-
-                main_class.predicates.push(ql::Predicate {
-                    qldoc: Some(String::from("Gets a field or child node of this node.")),
-                    name: "getAFieldOrChild",
-                    overridden: true,
-                    is_final: true,
-                    return_type: Some(ql::Type::Normal("AstNode")),
-                    formal_parameters: vec![],
-                    body: ql::Expression::Or(get_child_exprs),
-                });
-
-                classes.push(ql::TopLevel::Class(main_class));
-            }
-        }
-    }
-
-    classes
-}
--- a/ruby/extractor/src/lib.rs
+++ b/ruby/extractor/src/lib.rs
@@ -1,6 +0,0 @@
-pub mod diagnostics;
-pub mod extractor;
-pub mod file_paths;
-pub mod generator;
-pub mod node_types;
-pub mod trap;
--- a/ruby/extractor/src/node_types.rs
+++ b/ruby/extractor/src/node_types.rs
@@ -1,449 +0,0 @@
-use serde::Deserialize;
-use std::collections::BTreeMap;
-use std::path::Path;
-
-use std::collections::BTreeSet as Set;
-use std::fs;
-
-/// A lookup table from TypeName to Entry.
-pub type NodeTypeMap = BTreeMap<TypeName, Entry>;
-
-#[derive(Debug)]
-pub struct Entry {
-    pub dbscheme_name: String,
-    pub ql_class_name: String,
-    pub kind: EntryKind,
-}
-
-#[derive(Debug)]
-pub enum EntryKind {
-    Union { members: Set<TypeName> },
-    Table { name: String, fields: Vec<Field> },
-    Token { kind_id: usize },
-}
-
-#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
-pub struct TypeName {
-    pub kind: String,
-    pub named: bool,
-}
-
-#[derive(Debug)]
-pub enum FieldTypeInfo {
-    /// The field has a single type.
-    Single(TypeName),
-
-    /// The field can take one of several types, so we also provide the name of
-    /// the database union type that wraps them, and the corresponding QL class
-    /// name.
-    Multiple {
-        types: Set<TypeName>,
-        dbscheme_union: String,
-        ql_class: String,
-    },
-
-    /// The field can be one of several tokens, so the db type will be an `int`
-    /// with a `case @foo.kind` for each possibility.
-    ReservedWordInt(BTreeMap<String, (usize, String)>),
-}
-
-#[derive(Debug)]
-pub struct Field {
-    pub parent: TypeName,
-    pub type_info: FieldTypeInfo,
-    /// The name of the field or None for the anonymous 'children'
-    /// entry from node_types.json
-    pub name: Option<String>,
-    /// The name of the predicate to get this field.
-    pub getter_name: String,
-    pub storage: Storage,
-}
-
-fn name_for_field_or_child(name: &Option<String>) -> String {
-    match name {
-        Some(name) => name.clone(),
-        None => "child".to_owned(),
-    }
-}
-
-#[derive(Debug)]
-pub enum Storage {
-    /// the field is stored as a column in the parent table
-    Column { name: String },
-    /// the field is stored in a link table
-    Table {
-        /// the name of the table
-        name: String,
-        /// the name of the column for the field in the dbscheme
-        column_name: String,
-        /// does it have an associated index column?
-        has_index: bool,
-    },
-}
-
-impl Storage {
-    pub fn is_column(&self) -> bool {
-        match self {
-            Storage::Column { .. } => true,
-            _ => false,
-        }
-    }
-}
-pub fn read_node_types(prefix: &str, node_types_path: &Path) -> std::io::Result<NodeTypeMap> {
-    let file = fs::File::open(node_types_path)?;
-    let node_types: Vec<NodeInfo> = serde_json::from_reader(file)?;
-    Ok(convert_nodes(prefix, &node_types))
-}
-
-pub fn read_node_types_str(prefix: &str, node_types_json: &str) -> std::io::Result<NodeTypeMap> {
-    let node_types: Vec<NodeInfo> = serde_json::from_str(node_types_json)?;
-    Ok(convert_nodes(prefix, &node_types))
-}
-
-fn convert_type(node_type: &NodeType) -> TypeName {
-    TypeName {
-        kind: node_type.kind.to_string(),
-        named: node_type.named,
-    }
-}
-
-fn convert_types(node_types: &[NodeType]) -> Set<TypeName> {
-    node_types.iter().map(convert_type).collect()
-}
-
-pub fn convert_nodes(prefix: &str, nodes: &[NodeInfo]) -> NodeTypeMap {
-    let mut entries = NodeTypeMap::new();
-    let mut token_kinds = Set::new();
-
-    // First, find all the token kinds
-    for node in nodes {
-        if node.subtypes.is_none()
-            && node.fields.as_ref().map_or(0, |x| x.len()) == 0
-            && node.children.is_none()
-        {
-            let type_name = TypeName {
-                kind: node.kind.clone(),
-                named: node.named,
-            };
-            token_kinds.insert(type_name);
-        }
-    }
-
-    for node in nodes {
-        let flattened_name = &node_type_name(&node.kind, node.named);
-        let dbscheme_name = escape_name(flattened_name);
-        let ql_class_name = dbscheme_name_to_class_name(&dbscheme_name);
-        let dbscheme_name = format!("{}_{}", prefix, &dbscheme_name);
-        if let Some(subtypes) = &node.subtypes {
-            // It's a tree-sitter supertype node, for which we create a union
-            // type.
-            entries.insert(
-                TypeName {
-                    kind: node.kind.clone(),
-                    named: node.named,
-                },
-                Entry {
-                    dbscheme_name,
-                    ql_class_name,
-                    kind: EntryKind::Union {
-                        members: convert_types(subtypes),
-                    },
-                },
-            );
-        } else if node.fields.as_ref().map_or(0, |x| x.len()) == 0 && node.children.is_none() {
-            // Token kind, handled above.
-        } else {
-            // It's a product type, defined by a table.
-            let type_name = TypeName {
-                kind: node.kind.clone(),
-                named: node.named,
-            };
-            let table_name = escape_name(&(format!("{}_def", &flattened_name)));
-            let table_name = format!("{}_{}", prefix, &table_name);
-
-            let mut fields = Vec::new();
-
-            // If the type also has fields or children, then we create either
-            // auxiliary tables or columns in the defining table for them.
-            if let Some(node_fields) = &node.fields {
-                for (field_name, field_info) in node_fields {
-                    add_field(
-                        prefix,
-                        &type_name,
-                        Some(field_name.to_string()),
-                        field_info,
-                        &mut fields,
-                        &token_kinds,
-                    );
-                }
-            }
-            if let Some(children) = &node.children {
-                // Treat children as if they were a field called 'child'.
-                add_field(
-                    prefix,
-                    &type_name,
-                    None,
-                    children,
-                    &mut fields,
-                    &token_kinds,
-                );
-            }
-            entries.insert(
-                type_name,
-                Entry {
-                    dbscheme_name,
-                    ql_class_name,
-                    kind: EntryKind::Table {
-                        name: table_name,
-                        fields,
-                    },
-                },
-            );
-        }
-    }
-    let mut counter = 0;
-    for type_name in token_kinds {
-        let entry = if type_name.named {
-            counter += 1;
-            let unprefixed_name = node_type_name(&type_name.kind, true);
-            Entry {
-                dbscheme_name: escape_name(&format!("{}_token_{}", &prefix, &unprefixed_name)),
-                ql_class_name: dbscheme_name_to_class_name(&escape_name(&unprefixed_name)),
-                kind: EntryKind::Token { kind_id: counter },
-            }
-        } else {
-            Entry {
-                dbscheme_name: format!("{}_reserved_word", &prefix),
-                ql_class_name: "ReservedWord".to_owned(),
-                kind: EntryKind::Token { kind_id: 0 },
-            }
-        };
-        entries.insert(type_name, entry);
-    }
-    entries
-}
-
-fn add_field(
-    prefix: &str,
-    parent_type_name: &TypeName,
-    field_name: Option<String>,
-    field_info: &FieldInfo,
-    fields: &mut Vec<Field>,
-    token_kinds: &Set<TypeName>,
-) {
-    let parent_flattened_name = node_type_name(&parent_type_name.kind, parent_type_name.named);
-    let column_name = escape_name(&name_for_field_or_child(&field_name));
-    let storage = if !field_info.multiple && field_info.required {
-        // This field must appear exactly once, so we add it as
-        // a column to the main table for the node type.
-        Storage::Column { name: column_name }
-    } else {
-        // Put the field in an auxiliary table.
-        let has_index = field_info.multiple;
-        let field_table_name = escape_name(&format!(
-            "{}_{}_{}",
-            &prefix,
-            parent_flattened_name,
-            &name_for_field_or_child(&field_name)
-        ));
-        Storage::Table {
-            has_index,
-            name: field_table_name,
-            column_name,
-        }
-    };
-    let converted_types = convert_types(&field_info.types);
-    let type_info = if storage.is_column()
-        && field_info
-            .types
-            .iter()
-            .all(|t| !t.named && token_kinds.contains(&convert_type(t)))
-    {
-        // All possible types for this field are reserved words. The db
-        // representation will be an `int` with a `case @foo.field = ...` to
-        // enumerate the possible values.
-        let mut field_token_ints: BTreeMap<String, (usize, String)> = BTreeMap::new();
-        for (counter, t) in converted_types.into_iter().enumerate() {
-            let dbscheme_variant_name =
-                escape_name(&format!("{}_{}_{}", &prefix, parent_flattened_name, t.kind));
-            field_token_ints.insert(t.kind.to_owned(), (counter, dbscheme_variant_name));
-        }
-        FieldTypeInfo::ReservedWordInt(field_token_ints)
-    } else if field_info.types.len() == 1 {
-        FieldTypeInfo::Single(converted_types.into_iter().next().unwrap())
-    } else {
-        // The dbscheme type for this field will be a union. In QL, it'll just be AstNode.
-        FieldTypeInfo::Multiple {
-            types: converted_types,
-            dbscheme_union: format!(
-                "{}_{}_{}_type",
-                &prefix,
-                &parent_flattened_name,
-                &name_for_field_or_child(&field_name)
-            ),
-            ql_class: "AstNode".to_owned(),
-        }
-    };
-    let getter_name = format!(
-        "get{}",
-        dbscheme_name_to_class_name(&escape_name(&name_for_field_or_child(&field_name)))
-    );
-    fields.push(Field {
-        parent: TypeName {
-            kind: parent_type_name.kind.to_string(),
-            named: parent_type_name.named,
-        },
-        type_info,
-        name: field_name,
-        getter_name,
-        storage,
-    });
-}
-#[derive(Deserialize)]
-pub struct NodeInfo {
-    #[serde(rename = "type")]
-    pub kind: String,
-    pub named: bool,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub fields: Option<BTreeMap<String, FieldInfo>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub children: Option<FieldInfo>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub subtypes: Option<Vec<NodeType>>,
-}
-
-#[derive(Deserialize)]
-pub struct NodeType {
-    #[serde(rename = "type")]
-    pub kind: String,
-    pub named: bool,
-}
-
-#[derive(Deserialize)]
-pub struct FieldInfo {
-    pub multiple: bool,
-    pub required: bool,
-    pub types: Vec<NodeType>,
-}
-
-/// Given a tree-sitter node type's (kind, named) pair, returns a single string
-/// representing the (unescaped) name we'll use to refer to corresponding QL
-/// type.
-fn node_type_name(kind: &str, named: bool) -> String {
-    if named {
-        kind.to_string()
-    } else {
-        format!("{}_unnamed", kind)
-    }
-}
-
-const RESERVED_KEYWORDS: [&str; 14] = [
-    "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype",
-    "type", "unique", "varchar",
-];
-
-/// Returns a string that's a copy of `name` but suitably escaped to be a valid
-/// QL identifier.
-fn escape_name(name: &str) -> String {
-    let mut result = String::new();
-
-    // If there's a leading underscore, replace it with 'underscore_'.
-    if let Some(c) = name.chars().next() {
-        if c == '_' {
-            result.push_str("underscore");
-        }
-    }
-    for c in name.chars() {
-        match c {
-            '{' => result.push_str("lbrace"),
-            '}' => result.push_str("rbrace"),
-            '<' => result.push_str("langle"),
-            '>' => result.push_str("rangle"),
-            '[' => result.push_str("lbracket"),
-            ']' => result.push_str("rbracket"),
-            '(' => result.push_str("lparen"),
-            ')' => result.push_str("rparen"),
-            '|' => result.push_str("pipe"),
-            '=' => result.push_str("equal"),
-            '~' => result.push_str("tilde"),
-            '?' => result.push_str("question"),
-            '`' => result.push_str("backtick"),
-            '^' => result.push_str("caret"),
-            '!' => result.push_str("bang"),
-            '#' => result.push_str("hash"),
-            '%' => result.push_str("percent"),
-            '&' => result.push_str("ampersand"),
-            '.' => result.push_str("dot"),
-            ',' => result.push_str("comma"),
-            '/' => result.push_str("slash"),
-            ':' => result.push_str("colon"),
-            ';' => result.push_str("semicolon"),
-            '"' => result.push_str("dquote"),
-            '*' => result.push_str("star"),
-            '+' => result.push_str("plus"),
-            '-' => result.push_str("minus"),
-            '@' => result.push_str("at"),
-            _ if c.is_uppercase() => {
-                result.push('_');
-                result.push_str(&c.to_lowercase().to_string())
-            }
-            _ => result.push(c),
-        }
-    }
-
-    for &keyword in &RESERVED_KEYWORDS {
-        if result == keyword {
-            result.push_str("__");
-            break;
-        }
-    }
-
-    result
-}
-
-pub fn to_snake_case(word: &str) -> String {
-    let mut prev_upper = true;
-    let mut result = String::new();
-    for c in word.chars() {
-        if c.is_uppercase() {
-            if !prev_upper {
-                result.push('_')
-            }
-            prev_upper = true;
-            result.push(c.to_ascii_lowercase());
-        } else {
-            prev_upper = false;
-            result.push(c);
-        }
-    }
-    result
-}
-/// Given a valid dbscheme name (i.e. in snake case), produces the equivalent QL
-/// name (i.e. in CamelCase). For example, "foo_bar_baz" becomes "FooBarBaz".
-fn dbscheme_name_to_class_name(dbscheme_name: &str) -> String {
-    fn to_title_case(word: &str) -> String {
-        let mut first = true;
-        let mut result = String::new();
-        for c in word.chars() {
-            if first {
-                first = false;
-                result.push(c.to_ascii_uppercase());
-            } else {
-                result.push(c);
-            }
-        }
-        result
-    }
-    dbscheme_name
-        .split('_')
-        .map(to_title_case)
-        .collect::<Vec<String>>()
-        .join("")
-}
-
-#[test]
-fn to_snake_case_test() {
-    assert_eq!("ruby", to_snake_case("Ruby"));
-    assert_eq!("erb", to_snake_case("ERB"));
-    assert_eq!("embedded_template", to_snake_case("EmbeddedTemplate"));
-}
--- a/ruby/extractor/src/trap.rs
+++ b/ruby/extractor/src/trap.rs
@@ -1,272 +0,0 @@
-use std::borrow::Cow;
-use std::fmt;
-use std::io::{BufWriter, Write};
-use std::path::Path;
-
-use flate2::write::GzEncoder;
-
-pub struct Writer {
-    /// The accumulated trap entries
-    trap_output: Vec<Entry>,
-    /// A counter for generating fresh labels
-    counter: u32,
-    /// cache of global keys
-    global_keys: std::collections::HashMap<String, Label>,
-}
-
-impl Writer {
-    pub fn new() -> Writer {
-        Writer {
-            counter: 0,
-            trap_output: Vec::new(),
-            global_keys: std::collections::HashMap::new(),
-        }
-    }
-
-    pub fn fresh_id(&mut self) -> Label {
-        let label = Label(self.counter);
-        self.counter += 1;
-        self.trap_output.push(Entry::FreshId(label));
-        label
-    }
-
-    ///  Gets a label that will hold the unique ID of the passed string at import time.
-    ///  This can be used for incrementally importable TRAP files -- use globally unique
-    ///  strings to compute a unique ID for table tuples.
-    ///
-    ///  Note: You probably want to make sure that the key strings that you use are disjoint
-    ///  for disjoint column types; the standard way of doing this is to prefix (or append)
-    ///  the column type name to the ID. Thus, you might identify methods in Java by the
-    ///  full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
-    pub fn global_id(&mut self, key: &str) -> (Label, bool) {
-        if let Some(label) = self.global_keys.get(key) {
-            return (*label, false);
-        }
-        let label = Label(self.counter);
-        self.counter += 1;
-        self.global_keys.insert(key.to_owned(), label);
-        self.trap_output
-            .push(Entry::MapLabelToKey(label, key.to_owned()));
-        (label, true)
-    }
-
-    pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
-        self.trap_output
-            .push(Entry::GenericTuple(table_name.to_owned(), args))
-    }
-
-    pub fn comment(&mut self, text: String) {
-        self.trap_output.push(Entry::Comment(text));
-    }
-
-    pub fn write_to_file(&self, path: &Path, compression: Compression) -> std::io::Result<()> {
-        let trap_file = std::fs::File::create(path)?;
-        match compression {
-            Compression::None => {
-                let mut trap_file = BufWriter::new(trap_file);
-                self.write_trap_entries(&mut trap_file)
-            }
-            Compression::Gzip => {
-                let trap_file = GzEncoder::new(trap_file, flate2::Compression::fast());
-                let mut trap_file = BufWriter::new(trap_file);
-                self.write_trap_entries(&mut trap_file)
-            }
-        }
-    }
-
-    fn write_trap_entries<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
-        for trap_entry in &self.trap_output {
-            writeln!(file, "{}", trap_entry)?;
-        }
-        std::io::Result::Ok(())
-    }
-}
-
-pub enum Entry {
-    /// Maps the label to a fresh id, e.g. `#123=*`.
-    FreshId(Label),
-    /// Maps the label to a key, e.g. `#7=@"foo"`.
-    MapLabelToKey(Label, String),
-    /// foo_bar(arg*)
-    GenericTuple(String, Vec<Arg>),
-    Comment(String),
-}
-
-impl fmt::Display for Entry {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Entry::FreshId(label) => write!(f, "{}=*", label),
-            Entry::MapLabelToKey(label, key) => {
-                write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
-            }
-            Entry::GenericTuple(name, args) => {
-                write!(f, "{}(", name)?;
-                for (index, arg) in args.iter().enumerate() {
-                    if index > 0 {
-                        write!(f, ",")?;
-                    }
-                    write!(f, "{}", arg)?;
-                }
-                write!(f, ")")
-            }
-            Entry::Comment(line) => write!(f, "// {}", line),
-        }
-    }
-}
-
-#[derive(Debug, Copy, Clone)]
-// Identifiers of the form #0, #1...
-pub struct Label(u32);
-
-impl fmt::Display for Label {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "#{:x}", self.0)
-    }
-}
-
-// Some untyped argument to a TrapEntry.
-#[derive(Debug, Clone)]
-pub enum Arg {
-    Label(Label),
-    Int(usize),
-    String(String),
-}
-
-const MAX_STRLEN: usize = 1048576;
-
-impl fmt::Display for Arg {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Arg::Label(x) => write!(f, "{}", x),
-            Arg::Int(x) => write!(f, "{}", x),
-            Arg::String(x) => write!(
-                f,
-                "\"{}\"",
-                limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
-            ),
-        }
-    }
-}
-
-pub struct Program(Vec<Entry>);
-
-impl fmt::Display for Program {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut text = String::new();
-        for trap_entry in &self.0 {
-            text.push_str(&format!("{}\n", trap_entry));
-        }
-        write!(f, "{}", text)
-    }
-}
-
-pub fn full_id_for_file(normalized_path: &str) -> String {
-    format!("{};sourcefile", escape_key(normalized_path))
-}
-
-pub fn full_id_for_folder(normalized_path: &str) -> String {
-    format!("{};folder", escape_key(normalized_path))
-}
-
-/// Escapes a string for use in a TRAP key, by replacing special characters with
-/// HTML entities.
-fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
-    fn needs_escaping(c: char) -> bool {
-        matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
-    }
-
-    let key = key.into();
-    if key.contains(needs_escaping) {
-        let mut escaped = String::with_capacity(2 * key.len());
-        for c in key.chars() {
-            match c {
-                '&' => escaped.push_str("&amp;"),
-                '{' => escaped.push_str("&lbrace;"),
-                '}' => escaped.push_str("&rbrace;"),
-                '"' => escaped.push_str("&quot;"),
-                '@' => escaped.push_str("&commat;"),
-                '#' => escaped.push_str("&num;"),
-                _ => escaped.push(c),
-            }
-        }
-        Cow::Owned(escaped)
-    } else {
-        key
-    }
-}
-
-/// Limit the length (in bytes) of a string. If the string's length in bytes is
-/// less than or equal to the limit then the entire string is returned. Otherwise
-/// the string is sliced at the provided limit. If there is a multi-byte character
-/// at the limit then the returned slice will be slightly shorter than the limit to
-/// avoid splitting that multi-byte character.
-fn limit_string(string: &str, max_size: usize) -> &str {
-    if string.len() <= max_size {
-        return string;
-    }
-    let p = string.as_bytes();
-    let mut index = max_size;
-    // We want to clip the string at [max_size]; however, the character at that position
-    // may span several bytes. We need to find the first byte of the character. In UTF-8
-    // encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
-    // Therefore we decrement the index as long as there are bytes matching this pattern.
-    // This ensures we cut the string at the border between one character and another.
-    while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
-        index -= 1;
-    }
-    &string[0..index]
-}
-
-#[derive(Clone, Copy)]
-pub enum Compression {
-    None,
-    Gzip,
-}
-
-impl Compression {
-    pub fn from_env(var_name: &str) -> Result<Compression, String> {
-        match std::env::var(var_name) {
-            Ok(method) => match Compression::from_string(&method) {
-                Some(c) => Ok(c),
-                None => Err(format!("Unknown compression method '{}'", &method)),
-            },
-            // Default compression method if the env var isn't set:
-            Err(_) => Ok(Compression::Gzip),
-        }
-    }
-
-    pub fn from_string(s: &str) -> Option<Compression> {
-        match s.to_lowercase().as_ref() {
-            "none" => Some(Compression::None),
-            "gzip" => Some(Compression::Gzip),
-            _ => None,
-        }
-    }
-
-    pub fn extension(&self) -> &str {
-        match self {
-            Compression::None => "trap",
-            Compression::Gzip => "trap.gz",
-        }
-    }
-}
-
-#[test]
-fn limit_string_test() {
-    assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
-    assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
-    assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
-}
-
-#[test]
-fn escape_key_test() {
-    assert_eq!("foo!", escape_key("foo!"));
-    assert_eq!("foo&lbrace;&rbrace;", escape_key("foo{}"));
-    assert_eq!("&lbrace;&rbrace;", escape_key("{}"));
-    assert_eq!("", escape_key(""));
-    assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
-    assert_eq!(
-        "/path/to/foo&amp;&lbrace;&rbrace;&quot;&commat;&num;.rb",
-        escape_key("/path/to/foo&{}\"@#.rb")
-    );
-}
--- a/ruby/scripts/create-extractor-pack.sh
+++ b/ruby/scripts/create-extractor-pack.sh
@@ -14,7 +14,15 @@ else
 fi

 (cd extractor && "$CARGO" build --release)
-extractor/target/release/generator --dbscheme ql/lib/ruby.dbscheme --library ql/lib/codeql/ruby/ast/internal/TreeSitter.qll
+
+# If building via cross, the binaries will be in extractor/target/<triple>/release
+# If building via cargo, the binaries will be in extractor/target/release
+BIN_DIR=extractor/target/release
+if [[ "$CARGO" == "cross" ]]; then
+  BIN_DIR=extractor/target/x86_64-unknown-linux-gnu/release
+fi
+
+"$BIN_DIR/generator" --dbscheme ql/lib/ruby.dbscheme --library ql/lib/codeql/ruby/ast/internal/TreeSitter.qll

 codeql query format -i ql/lib/codeql/ruby/ast/internal/TreeSitter.qll

@@ -22,5 +30,5 @@ rm -rf extractor-pack
 mkdir -p extractor-pack
 cp -r codeql-extractor.yml downgrades tools ql/lib/ruby.dbscheme ql/lib/ruby.dbscheme.stats extractor-pack/
 mkdir -p extractor-pack/tools/${platform}
-cp extractor/target/release/extractor extractor-pack/tools/${platform}/extractor
-cp extractor/target/release/autobuilder extractor-pack/tools/${platform}/autobuilder
+cp "$BIN_DIR/extractor" extractor-pack/tools/${platform}/extractor
+cp "$BIN_DIR/autobuilder" extractor-pack/tools/${platform}/autobuilder