Ruby: generate overlay discard predicates

This commit is contained in:
Nick Rolfe
2025-06-06 18:06:13 +01:00
parent 1bbba2f664
commit a9ddf0026b
5 changed files with 287 additions and 3 deletions

View File

@@ -2,7 +2,6 @@ use clap::Args;
use codeql_extractor::file_paths::PathTransformer; use codeql_extractor::file_paths::PathTransformer;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use rayon::prelude::*; use rayon::prelude::*;
use serde_json;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs; use std::fs;

View File

@@ -5,6 +5,10 @@
import codeql.Locations as L import codeql.Locations as L
/** Holds if the database is an overlay. */
overlay[local]
private predicate isOverlay() { databaseMetadata("isOverlay", "true") }
module Ruby { module Ruby {
/** The base class for all AST nodes */ /** The base class for all AST nodes */
class AstNode extends @ruby_ast_node { class AstNode extends @ruby_ast_node {
@@ -48,6 +52,30 @@ module Ruby {
final override string getAPrimaryQlClass() { result = "ReservedWord" } final override string getAPrimaryQlClass() { result = "ReservedWord" }
} }
/** Gets the file containing the given `node`. */
overlay[local]
private @file getNodeFile(@ruby_ast_node node) {
exists(@location_default loc | ruby_ast_node_location(node, loc) |
locations_default(loc, result, _, _, _, _)
)
}
/** Holds if `file` was extracted as part of the overlay database. */
overlay[local]
private predicate discardFile(@file file) { isOverlay() and file = getNodeFile(_) }
/** Holds if `node` is in the `file` and is part of the overlay base database. */
overlay[local]
private predicate discardableAstNode(@file file, @ruby_ast_node node) {
not isOverlay() and file = getNodeFile(node)
}
/** Holds if `node` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */
overlay[discard_entity]
private predicate discardAstNode(@ruby_ast_node node) {
exists(@file file | discardableAstNode(file, node) and discardFile(file))
}
class UnderscoreArg extends @ruby_underscore_arg, AstNode { } class UnderscoreArg extends @ruby_underscore_arg, AstNode { }
class UnderscoreCallOperator extends @ruby_underscore_call_operator, AstNode { } class UnderscoreCallOperator extends @ruby_underscore_call_operator, AstNode { }
@@ -1970,6 +1998,30 @@ module Erb {
final override string getAPrimaryQlClass() { result = "ReservedWord" } final override string getAPrimaryQlClass() { result = "ReservedWord" }
} }
/** Gets the file containing the given `node`. */
overlay[local]
private @file getNodeFile(@erb_ast_node node) {
exists(@location_default loc | erb_ast_node_location(node, loc) |
locations_default(loc, result, _, _, _, _)
)
}
/** Holds if `file` was extracted as part of the overlay database. */
overlay[local]
private predicate discardFile(@file file) { isOverlay() and file = getNodeFile(_) }
/** Holds if `node` is in the `file` and is part of the overlay base database. */
overlay[local]
private predicate discardableAstNode(@file file, @erb_ast_node node) {
not isOverlay() and file = getNodeFile(node)
}
/** Holds if `node` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */
overlay[discard_entity]
private predicate discardAstNode(@erb_ast_node node) {
exists(@file file | discardableAstNode(file, node) and discardFile(file))
}
/** A class representing `code` tokens. */ /** A class representing `code` tokens. */
class Code extends @erb_token_code, Token { class Code extends @erb_token_code, Token {
/** Gets the name of the primary QL class for this element. */ /** Gets the name of the primary QL class for this element. */

View File

@@ -17,7 +17,7 @@ pub fn generate(
languages: Vec<language::Language>, languages: Vec<language::Language>,
dbscheme_path: PathBuf, dbscheme_path: PathBuf,
ql_library_path: PathBuf, ql_library_path: PathBuf,
add_metadata_relation: bool, overlay_support: bool,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let dbscheme_file = File::create(dbscheme_path).map_err(|e| { let dbscheme_file = File::create(dbscheme_path).map_err(|e| {
tracing::error!("Failed to create dbscheme file: {}", e); tracing::error!("Failed to create dbscheme file: {}", e);
@@ -35,7 +35,7 @@ pub fn generate(
// Eventually all languages will have the metadata relation (for overlay support), at which // Eventually all languages will have the metadata relation (for overlay support), at which
// point this could be moved to prefix.dbscheme. // point this could be moved to prefix.dbscheme.
if add_metadata_relation { if overlay_support {
writeln!(dbscheme_writer, "/*- Database metadata -*/",)?; writeln!(dbscheme_writer, "/*- Database metadata -*/",)?;
dbscheme::write( dbscheme::write(
&mut dbscheme_writer, &mut dbscheme_writer,
@@ -60,6 +60,15 @@ pub fn generate(
})], })],
)?; )?;
if overlay_support {
ql::write(
&mut ql_writer,
&[ql::TopLevel::Predicate(
ql_gen::create_is_overlay_predicate(),
)],
)?;
}
for language in languages { for language in languages {
let prefix = node_types::to_snake_case(&language.name); let prefix = node_types::to_snake_case(&language.name);
let ast_node_name = format!("{}_ast_node", &prefix); let ast_node_name = format!("{}_ast_node", &prefix);
@@ -103,6 +112,22 @@ pub fn generate(
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)), ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)), ql::TopLevel::Class(ql_gen::create_reserved_word_class(&reserved_word_name)),
]; ];
if overlay_support {
body.push(ql::TopLevel::Predicate(
ql_gen::create_get_node_file_predicate(&ast_node_name, &node_location_table_name),
));
body.push(ql::TopLevel::Predicate(
ql_gen::create_discard_file_predicate(),
));
body.push(ql::TopLevel::Predicate(
ql_gen::create_discardable_ast_node_predicate(&ast_node_name),
));
body.push(ql::TopLevel::Predicate(
ql_gen::create_discard_ast_node_predicate(&ast_node_name),
));
}
body.append(&mut ql_gen::convert_nodes(&nodes)); body.append(&mut ql_gen::convert_nodes(&nodes));
ql::write( ql::write(
&mut ql_writer, &mut ql_writer,

View File

@@ -6,6 +6,7 @@ pub enum TopLevel<'a> {
Class(Class<'a>), Class(Class<'a>),
Import(Import<'a>), Import(Import<'a>),
Module(Module<'a>), Module(Module<'a>),
Predicate(Predicate<'a>),
} }
impl fmt::Display for TopLevel<'_> { impl fmt::Display for TopLevel<'_> {
@@ -14,6 +15,7 @@ impl fmt::Display for TopLevel<'_> {
TopLevel::Import(imp) => write!(f, "{}", imp), TopLevel::Import(imp) => write!(f, "{}", imp),
TopLevel::Class(cls) => write!(f, "{}", cls), TopLevel::Class(cls) => write!(f, "{}", cls),
TopLevel::Module(m) => write!(f, "{}", m), TopLevel::Module(m) => write!(f, "{}", m),
TopLevel::Predicate(pred) => write!(f, "{}", pred),
} }
} }
} }
@@ -68,10 +70,12 @@ impl fmt::Display for Class<'_> {
qldoc: None, qldoc: None,
name: self.name, name: self.name,
overridden: false, overridden: false,
is_private: false,
is_final: false, is_final: false,
return_type: None, return_type: None,
formal_parameters: vec![], formal_parameters: vec![],
body: charpred.clone(), body: charpred.clone(),
overlay: None,
} }
)?; )?;
} }
@@ -150,6 +154,7 @@ pub enum Expression<'a> {
expr: Box<Expression<'a>>, expr: Box<Expression<'a>>,
second_expr: Option<Box<Expression<'a>>>, second_expr: Option<Box<Expression<'a>>>,
}, },
Negation(Box<Expression<'a>>),
} }
impl fmt::Display for Expression<'_> { impl fmt::Display for Expression<'_> {
@@ -231,19 +236,28 @@ impl fmt::Display for Expression<'_> {
} }
write!(f, ")") write!(f, ")")
} }
Expression::Negation(e) => write!(f, "not ({})", e),
} }
} }
} }
#[derive(Clone, Eq, PartialEq, Hash)]
pub enum OverlayAnnotation {
Local,
DiscardEntity,
}
#[derive(Clone, Eq, PartialEq, Hash)] #[derive(Clone, Eq, PartialEq, Hash)]
pub struct Predicate<'a> { pub struct Predicate<'a> {
pub qldoc: Option<String>, pub qldoc: Option<String>,
pub name: &'a str, pub name: &'a str,
pub overridden: bool, pub overridden: bool,
pub is_private: bool,
pub is_final: bool, pub is_final: bool,
pub return_type: Option<Type<'a>>, pub return_type: Option<Type<'a>>,
pub formal_parameters: Vec<FormalParameter<'a>>, pub formal_parameters: Vec<FormalParameter<'a>>,
pub body: Expression<'a>, pub body: Expression<'a>,
pub overlay: Option<OverlayAnnotation>,
} }
impl fmt::Display for Predicate<'_> { impl fmt::Display for Predicate<'_> {
@@ -251,6 +265,17 @@ impl fmt::Display for Predicate<'_> {
if let Some(qldoc) = &self.qldoc { if let Some(qldoc) = &self.qldoc {
write!(f, "/** {} */", qldoc)?; write!(f, "/** {} */", qldoc)?;
} }
if let Some(overlay_annotation) = &self.overlay {
write!(f, "overlay[")?;
match overlay_annotation {
OverlayAnnotation::Local => write!(f, "local")?,
OverlayAnnotation::DiscardEntity => write!(f, "discard_entity")?,
}
write!(f, "] ")?;
}
if self.is_private {
write!(f, "private ")?;
}
if self.is_final { if self.is_final {
write!(f, "final ")?; write!(f, "final ")?;
} }

View File

@@ -16,6 +16,7 @@ pub fn create_ast_node_class<'a>(
)), )),
name: "toString", name: "toString",
overridden: false, overridden: false,
is_private: false,
is_final: false, is_final: false,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
@@ -27,11 +28,13 @@ pub fn create_ast_node_class<'a>(
vec![], vec![],
)), )),
), ),
overlay: None,
}; };
let get_location = ql::Predicate { let get_location = ql::Predicate {
name: "getLocation", name: "getLocation",
qldoc: Some(String::from("Gets the location of this element.")), qldoc: Some(String::from("Gets the location of this element.")),
overridden: false, overridden: false,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::Normal("L::Location")), return_type: Some(ql::Type::Normal("L::Location")),
formal_parameters: vec![], formal_parameters: vec![],
@@ -39,6 +42,7 @@ pub fn create_ast_node_class<'a>(
node_location_table, node_location_table,
vec![ql::Expression::Var("this"), ql::Expression::Var("result")], vec![ql::Expression::Var("this"), ql::Expression::Var("result")],
), ),
overlay: None,
}; };
let get_a_field_or_child = create_none_predicate( let get_a_field_or_child = create_none_predicate(
Some(String::from("Gets a field or child node of this node.")), Some(String::from("Gets a field or child node of this node.")),
@@ -50,6 +54,7 @@ pub fn create_ast_node_class<'a>(
qldoc: Some(String::from("Gets the parent of this element.")), qldoc: Some(String::from("Gets the parent of this element.")),
name: "getParent", name: "getParent",
overridden: false, overridden: false,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::Normal("AstNode")), return_type: Some(ql::Type::Normal("AstNode")),
formal_parameters: vec![], formal_parameters: vec![],
@@ -61,6 +66,7 @@ pub fn create_ast_node_class<'a>(
ql::Expression::Var("_"), ql::Expression::Var("_"),
], ],
), ),
overlay: None,
}; };
let get_parent_index = ql::Predicate { let get_parent_index = ql::Predicate {
qldoc: Some(String::from( qldoc: Some(String::from(
@@ -68,6 +74,7 @@ pub fn create_ast_node_class<'a>(
)), )),
name: "getParentIndex", name: "getParentIndex",
overridden: false, overridden: false,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::Int), return_type: Some(ql::Type::Int),
formal_parameters: vec![], formal_parameters: vec![],
@@ -79,6 +86,7 @@ pub fn create_ast_node_class<'a>(
ql::Expression::Var("result"), ql::Expression::Var("result"),
], ],
), ),
overlay: None,
}; };
let get_a_primary_ql_class = ql::Predicate { let get_a_primary_ql_class = ql::Predicate {
qldoc: Some(String::from( qldoc: Some(String::from(
@@ -86,6 +94,7 @@ pub fn create_ast_node_class<'a>(
)), )),
name: "getAPrimaryQlClass", name: "getAPrimaryQlClass",
overridden: false, overridden: false,
is_private: false,
is_final: false, is_final: false,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
@@ -93,6 +102,7 @@ pub fn create_ast_node_class<'a>(
Box::new(ql::Expression::Var("result")), Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::String("???")), Box::new(ql::Expression::String("???")),
), ),
overlay: None,
}; };
let get_primary_ql_classes = ql::Predicate { let get_primary_ql_classes = ql::Predicate {
qldoc: Some( qldoc: Some(
@@ -102,6 +112,7 @@ pub fn create_ast_node_class<'a>(
), ),
name: "getPrimaryQlClasses", name: "getPrimaryQlClasses",
overridden: false, overridden: false,
is_private: false,
is_final: false, is_final: false,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
@@ -119,6 +130,7 @@ pub fn create_ast_node_class<'a>(
second_expr: Some(Box::new(ql::Expression::String(","))), second_expr: Some(Box::new(ql::Expression::String(","))),
}), }),
), ),
overlay: None,
}; };
ql::Class { ql::Class {
qldoc: Some(String::from("The base class for all AST nodes")), qldoc: Some(String::from("The base class for all AST nodes")),
@@ -144,10 +156,12 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl
qldoc: Some(String::from("Gets the value of this token.")), qldoc: Some(String::from("Gets the value of this token.")),
name: "getValue", name: "getValue",
overridden: false, overridden: false,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
body: create_get_field_expr_for_column_storage("result", tokeninfo, 1, tokeninfo_arity), body: create_get_field_expr_for_column_storage("result", tokeninfo, 1, tokeninfo_arity),
overlay: None,
}; };
let to_string = ql::Predicate { let to_string = ql::Predicate {
qldoc: Some(String::from( qldoc: Some(String::from(
@@ -155,6 +169,7 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl
)), )),
name: "toString", name: "toString",
overridden: true, overridden: true,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
@@ -166,6 +181,7 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl
vec![], vec![],
)), )),
), ),
overlay: None,
}; };
ql::Class { ql::Class {
qldoc: Some(String::from("A token.")), qldoc: Some(String::from("A token.")),
@@ -210,10 +226,12 @@ fn create_none_predicate<'a>(
qldoc, qldoc,
name, name,
overridden, overridden,
is_private: false,
is_final: false, is_final: false,
return_type, return_type,
formal_parameters: Vec::new(), formal_parameters: Vec::new(),
body: ql::Expression::Pred("none", vec![]), body: ql::Expression::Pred("none", vec![]),
overlay: None,
} }
} }
@@ -226,6 +244,7 @@ fn create_get_a_primary_ql_class(class_name: &str, is_final: bool) -> ql::Predic
)), )),
name: "getAPrimaryQlClass", name: "getAPrimaryQlClass",
overridden: true, overridden: true,
is_private: false,
is_final, is_final,
return_type: Some(ql::Type::String), return_type: Some(ql::Type::String),
formal_parameters: vec![], formal_parameters: vec![],
@@ -233,6 +252,166 @@ fn create_get_a_primary_ql_class(class_name: &str, is_final: bool) -> ql::Predic
Box::new(ql::Expression::Var("result")), Box::new(ql::Expression::Var("result")),
Box::new(ql::Expression::String(class_name)), Box::new(ql::Expression::String(class_name)),
), ),
overlay: None,
}
}
pub fn create_is_overlay_predicate() -> ql::Predicate<'static> {
ql::Predicate {
name: "isOverlay",
qldoc: Some(String::from("Holds if the database is an overlay.")),
overridden: false,
is_private: true,
is_final: false,
return_type: None,
overlay: Some(ql::OverlayAnnotation::Local),
formal_parameters: vec![],
body: ql::Expression::Pred(
"databaseMetadata",
vec![
ql::Expression::String("isOverlay"),
ql::Expression::String("true"),
],
),
}
}
pub fn create_get_node_file_predicate<'a>(
ast_node_name: &'a str,
node_location_table_name: &'a str,
) -> ql::Predicate<'a> {
ql::Predicate {
name: "getNodeFile",
qldoc: Some(String::from("Gets the file containing the given `node`.")),
overridden: false,
is_private: true,
is_final: false,
overlay: Some(ql::OverlayAnnotation::Local),
return_type: Some(ql::Type::At("file")),
formal_parameters: vec![ql::FormalParameter {
name: "node",
param_type: ql::Type::At(ast_node_name),
}],
body: ql::Expression::Aggregate {
name: "exists",
vars: vec![ql::FormalParameter {
name: "loc",
param_type: ql::Type::At("location_default"),
}],
range: Some(Box::new(ql::Expression::Pred(
node_location_table_name,
vec![ql::Expression::Var("node"), ql::Expression::Var("loc")],
))),
expr: Box::new(ql::Expression::Pred(
"locations_default",
vec![
ql::Expression::Var("loc"),
ql::Expression::Var("result"),
ql::Expression::Var("_"),
ql::Expression::Var("_"),
ql::Expression::Var("_"),
ql::Expression::Var("_"),
],
)),
second_expr: None,
},
}
}
pub fn create_discard_file_predicate<'a>() -> ql::Predicate<'a> {
ql::Predicate {
name: "discardFile",
qldoc: Some(String::from(
"Holds if `file` was extracted as part of the overlay database.",
)),
overridden: false,
is_private: true,
is_final: false,
overlay: Some(ql::OverlayAnnotation::Local),
return_type: None,
formal_parameters: vec![ql::FormalParameter {
name: "file",
param_type: ql::Type::At("file"),
}],
body: ql::Expression::And(vec![
ql::Expression::Pred("isOverlay", vec![]),
ql::Expression::Equals(
Box::new(ql::Expression::Var("file")),
Box::new(ql::Expression::Pred(
"getNodeFile",
vec![ql::Expression::Var("_")],
)),
),
]),
}
}
pub fn create_discardable_ast_node_predicate(ast_node_name: &str) -> ql::Predicate {
ql::Predicate {
name: "discardableAstNode",
qldoc: Some(String::from(
"Holds if `node` is in the `file` and is part of the overlay base database.",
)),
overridden: false,
is_private: true,
is_final: false,
overlay: Some(ql::OverlayAnnotation::Local),
return_type: None,
formal_parameters: vec![
ql::FormalParameter {
name: "file",
param_type: ql::Type::At("file"),
},
ql::FormalParameter {
name: "node",
param_type: ql::Type::At(ast_node_name),
},
],
body: ql::Expression::And(vec![
ql::Expression::Negation(Box::new(ql::Expression::Pred("isOverlay", vec![]))),
ql::Expression::Equals(
Box::new(ql::Expression::Var("file")),
Box::new(ql::Expression::Pred(
"getNodeFile",
vec![ql::Expression::Var("node")],
)),
),
]),
}
}
pub fn create_discard_ast_node_predicate(ast_node_name: &str) -> ql::Predicate {
ql::Predicate {
name: "discardAstNode",
qldoc: Some(String::from(
"Holds if `node` should be discarded, because it is part of the overlay base \
and is in a file that was also extracted as part of the overlay database.",
)),
overridden: false,
is_private: true,
is_final: false,
overlay: Some(ql::OverlayAnnotation::DiscardEntity),
return_type: None,
formal_parameters: vec![ql::FormalParameter {
name: "node",
param_type: ql::Type::At(ast_node_name),
}],
body: ql::Expression::Aggregate {
name: "exists",
vars: vec![ql::FormalParameter {
name: "file",
param_type: ql::Type::At("file"),
}],
range: None,
expr: Box::new(ql::Expression::And(vec![
ql::Expression::Pred(
"discardableAstNode",
vec![ql::Expression::Var("file"), ql::Expression::Var("node")],
),
ql::Expression::Pred("discardFile", vec![ql::Expression::Var("file")]),
])),
second_expr: None,
},
} }
} }
@@ -435,10 +614,12 @@ fn create_field_getters<'a>(
qldoc: Some(qldoc), qldoc: Some(qldoc),
name: &field.getter_name, name: &field.getter_name,
overridden: false, overridden: false,
is_private: false,
is_final: true, is_final: true,
return_type, return_type,
formal_parameters, formal_parameters,
body, body,
overlay: None,
}, },
optional_expr, optional_expr,
) )
@@ -548,10 +729,12 @@ pub fn convert_nodes(nodes: &node_types::NodeTypeMap) -> Vec<ql::TopLevel> {
qldoc: Some(String::from("Gets a field or child node of this node.")), qldoc: Some(String::from("Gets a field or child node of this node.")),
name: "getAFieldOrChild", name: "getAFieldOrChild",
overridden: true, overridden: true,
is_private: false,
is_final: true, is_final: true,
return_type: Some(ql::Type::Normal("AstNode")), return_type: Some(ql::Type::Normal("AstNode")),
formal_parameters: vec![], formal_parameters: vec![],
body: ql::Expression::Or(get_child_exprs), body: ql::Expression::Or(get_child_exprs),
overlay: None,
}); });
classes.push(ql::TopLevel::Class(main_class)); classes.push(ql::TopLevel::Class(main_class));