mirror of
https://github.com/github/codeql.git
synced 2026-06-25 14:47:04 +02:00
Yeast: use child locations instead of rule target
Previously, when a node was synthesized it would always take the
location from the node that matched the current rule. This resulted
in overly broad locations however.
For (foo #{bar}) we now take the location of the 'bar' node.
For non-leaf nodes we merge all its child node locations.
This commit is contained in:
@@ -396,8 +396,10 @@ fn parse_direct_node_inner(tokens: &mut Tokens, ctx: &Ident) -> Result<TokenStre
|
||||
let expr = group.stream();
|
||||
return Ok(quote! {
|
||||
{
|
||||
let __value = yeast::YeastDisplay::yeast_to_string(&(#expr), &*#ctx.ast);
|
||||
#ctx.literal(#kind_str, &__value)
|
||||
let __expr = (#expr);
|
||||
let __value = yeast::YeastDisplay::yeast_to_string(&__expr, &*#ctx.ast);
|
||||
let __source_range = yeast::YeastSourceRange::yeast_source_range(&__expr, &*#ctx.ast);
|
||||
#ctx.literal_with_source_range(#kind_str, &__value, __source_range)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -82,6 +82,21 @@ impl<'a> BuildCtx<'a> {
|
||||
.create_named_token_with_range(kind, value.to_string(), self.source_range)
|
||||
}
|
||||
|
||||
/// Create a leaf node with fixed content and an optional preferred source range.
|
||||
/// If `source_range` is `None`, falls back to this context's inherited range.
|
||||
pub fn literal_with_source_range(
|
||||
&mut self,
|
||||
kind: &'static str,
|
||||
value: &str,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
) -> Id {
|
||||
self.ast.create_named_token_with_range(
|
||||
kind,
|
||||
value.to_string(),
|
||||
source_range.or(self.source_range),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a leaf node with an auto-generated unique name.
|
||||
pub fn fresh(&mut self, kind: &'static str, name: &str) -> Id {
|
||||
let generated = self.fresh.resolve(name);
|
||||
|
||||
@@ -58,12 +58,30 @@ pub trait YeastDisplay {
|
||||
fn yeast_to_string(&self, ast: &Ast) -> String;
|
||||
}
|
||||
|
||||
/// Optional source range for values used in `#{expr}` interpolations.
|
||||
///
|
||||
/// By default this returns `None`, so synthesized leaves inherit the matched
|
||||
/// rule's source range. `NodeRef` returns the referenced node's range, letting
|
||||
/// `(kind #{capture})` carry the captured node's location.
|
||||
pub trait YeastSourceRange {
|
||||
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range>;
|
||||
}
|
||||
|
||||
impl YeastDisplay for NodeRef {
|
||||
fn yeast_to_string(&self, ast: &Ast) -> String {
|
||||
ast.source_text(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl YeastSourceRange for NodeRef {
|
||||
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range> {
|
||||
ast.get_node(self.0).and_then(|n| match &n.content {
|
||||
NodeContent::Range(r) => Some(r.clone()),
|
||||
_ => n.source_range,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_yeast_display_via_display {
|
||||
($($t:ty),* $(,)?) => {
|
||||
$(
|
||||
@@ -72,6 +90,12 @@ macro_rules! impl_yeast_display_via_display {
|
||||
::std::string::ToString::to_string(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl YeastSourceRange for $t {
|
||||
fn yeast_source_range(&self, _ast: &Ast) -> Option<tree_sitter::Range> {
|
||||
None
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
@@ -90,6 +114,12 @@ impl<T: YeastDisplay + ?Sized> YeastDisplay for &T {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: YeastSourceRange + ?Sized> YeastSourceRange for &T {
|
||||
fn yeast_source_range(&self, ast: &Ast) -> Option<tree_sitter::Range> {
|
||||
(**self).yeast_source_range(ast)
|
||||
}
|
||||
}
|
||||
|
||||
pub const CHILD_FIELD: u16 = u16::MAX;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -368,6 +398,15 @@ impl Ast {
|
||||
is_named: bool,
|
||||
source_range: Option<tree_sitter::Range>,
|
||||
) -> Id {
|
||||
let source_range = match &content {
|
||||
// Parsed nodes already carry an exact source range in their content.
|
||||
NodeContent::Range(_) => source_range,
|
||||
// Synthesized nodes derive location from children when possible,
|
||||
// and fall back to the inherited rule-match range otherwise.
|
||||
_ => self
|
||||
.union_source_range_of_children(&fields)
|
||||
.or(source_range),
|
||||
};
|
||||
let id = self.nodes.len();
|
||||
self.nodes.push(Node {
|
||||
kind,
|
||||
@@ -383,6 +422,66 @@ impl Ast {
|
||||
id
|
||||
}
|
||||
|
||||
fn union_source_range_of_children(
|
||||
&self,
|
||||
fields: &BTreeMap<FieldId, Vec<Id>>,
|
||||
) -> Option<tree_sitter::Range> {
|
||||
let mut start_byte: Option<usize> = None;
|
||||
let mut end_byte: Option<usize> = None;
|
||||
let mut start_point = tree_sitter::Point { row: 0, column: 0 };
|
||||
let mut end_point = tree_sitter::Point { row: 0, column: 0 };
|
||||
|
||||
for child_ids in fields.values() {
|
||||
for &child_id in child_ids {
|
||||
let Some(child) = self.get_node(child_id) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let child_start_byte = child.start_byte();
|
||||
let child_end_byte = child.end_byte();
|
||||
|
||||
// Skip children that carry no usable location.
|
||||
if child_start_byte == 0 && child_end_byte == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
match start_byte {
|
||||
None => {
|
||||
start_byte = Some(child_start_byte);
|
||||
start_point = child.start_position();
|
||||
}
|
||||
Some(current_start) if child_start_byte < current_start => {
|
||||
start_byte = Some(child_start_byte);
|
||||
start_point = child.start_position();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
match end_byte {
|
||||
None => {
|
||||
end_byte = Some(child_end_byte);
|
||||
end_point = child.end_position();
|
||||
}
|
||||
Some(current_end) if child_end_byte > current_end => {
|
||||
end_byte = Some(child_end_byte);
|
||||
end_point = child.end_position();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match (start_byte, end_byte) {
|
||||
(Some(start_byte), Some(end_byte)) => Some(tree_sitter::Range {
|
||||
start_byte,
|
||||
end_byte,
|
||||
start_point,
|
||||
end_point,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_named_token(&mut self, kind: &'static str, content: String) -> Id {
|
||||
self.create_named_token_with_range(kind, content, None)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,16 @@ fn run_and_dump(input: &str, rules: Vec<Rule>) -> String {
|
||||
run_phased_and_dump(input, vec![Phase::new("test", PhaseKind::Repeating, rules)])
|
||||
}
|
||||
|
||||
/// Helper: parse Ruby source with custom rules and return the transformed AST.
|
||||
fn run_and_ast(input: &str, rules: Vec<Rule>) -> Ast {
|
||||
let lang: tree_sitter::Language = tree_sitter_ruby::LANGUAGE.into();
|
||||
let schema =
|
||||
yeast::node_types_yaml::schema_from_yaml_with_language(OUTPUT_SCHEMA_YAML, &lang).unwrap();
|
||||
let phases = vec![Phase::new("test", PhaseKind::Repeating, rules)];
|
||||
let runner = Runner::with_schema(lang, &schema, &phases);
|
||||
runner.run(input).unwrap()
|
||||
}
|
||||
|
||||
/// Helper: parse Ruby source with a custom output schema and multiple
|
||||
/// rule phases, return dump.
|
||||
fn run_phased_and_dump(input: &str, phases: Vec<Phase>) -> String {
|
||||
@@ -1172,3 +1182,37 @@ fn test_hash_brace_renders_integer_expression() {
|
||||
"#,
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression test: `(kind #{capture})` should inherit the captured node's
|
||||
/// source location, not the full source range of the matched rule root.
|
||||
#[test]
|
||||
fn test_hash_brace_uses_capture_location_for_leaf() {
|
||||
let rule = rule!(
|
||||
(call
|
||||
method: (identifier) @name
|
||||
receiver: (identifier) @recv
|
||||
)
|
||||
=>
|
||||
(call
|
||||
method: (identifier #{name})
|
||||
receiver: (identifier #{recv})
|
||||
arguments: (argument_list)
|
||||
)
|
||||
);
|
||||
|
||||
let ast = run_and_ast("foo.bar()", vec![rule]);
|
||||
|
||||
let mut bar_ids: Vec<usize> = Vec::new();
|
||||
for id in ast.reachable_node_ids() {
|
||||
let Some(node) = ast.get_node(id) else { continue; };
|
||||
if node.kind() == "identifier" && ast.source_text(id) == "bar" {
|
||||
bar_ids.push(id);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(bar_ids.len(), 1, "expected exactly one identifier 'bar'");
|
||||
let bar = ast.get_node(bar_ids[0]).unwrap();
|
||||
|
||||
assert_eq!(bar.start_byte(), 4);
|
||||
assert_eq!(bar.end_byte(), 7);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user