mirror of
https://github.com/github/codeql.git
synced 2026-06-17 10:51:09 +02:00
Merge pull request #21981 from asgerf/yeast/comments
Yeast/Unified: Extract comments
This commit is contained in:
@@ -333,6 +333,9 @@ pub fn extract(
|
||||
.run_from_tree(&tree, source)
|
||||
.unwrap_or_else(|e| panic!("Desugaring failed for {path_str}: {e}"));
|
||||
traverse_yeast(&ast, &mut visitor);
|
||||
// Comments and other `extra` nodes are not represented in the desugared
|
||||
// AST, so recover them directly from the original parse tree.
|
||||
traverse_extras(&tree, &mut visitor);
|
||||
} else {
|
||||
traverse(&tree, &mut visitor);
|
||||
}
|
||||
@@ -365,6 +368,8 @@ struct Visitor<'a> {
|
||||
ast_node_parent_table_name: String,
|
||||
/// Language-specific name of the tokeninfo table
|
||||
tokeninfo_table_name: String,
|
||||
/// Language-specific name of the trivia tokeninfo table
|
||||
trivia_tokeninfo_table_name: String,
|
||||
/// A lookup table from type name to node types
|
||||
schema: &'a NodeTypeMap,
|
||||
/// A stack for gathering information from child nodes. Whenever a node is
|
||||
@@ -395,11 +400,33 @@ impl<'a> Visitor<'a> {
|
||||
ast_node_location_table_name: format!("{language_prefix}_ast_node_location"),
|
||||
ast_node_parent_table_name: format!("{language_prefix}_ast_node_parent"),
|
||||
tokeninfo_table_name: format!("{language_prefix}_tokeninfo"),
|
||||
trivia_tokeninfo_table_name: format!("{language_prefix}_trivia_tokeninfo"),
|
||||
schema,
|
||||
stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits a `TriviaToken` for the given `extra` node (e.g. a comment) from
|
||||
/// the original parse tree. Trivia tokens carry a location and their source
|
||||
/// text, but are not attached to a parent in the (possibly desugared) AST.
|
||||
fn emit_trivia_token(&mut self, node: &Node) {
|
||||
let id = self.trap_writer.fresh_id();
|
||||
let loc = location_for(self, self.file_label, node);
|
||||
let loc_label = location_label(self.trap_writer, loc);
|
||||
self.trap_writer.add_tuple(
|
||||
&self.ast_node_location_table_name,
|
||||
vec![trap::Arg::Label(id), trap::Arg::Label(loc_label)],
|
||||
);
|
||||
self.trap_writer.add_tuple(
|
||||
&self.trivia_tokeninfo_table_name,
|
||||
vec![
|
||||
trap::Arg::Label(id),
|
||||
trap::Arg::Int(node.kind_id() as usize),
|
||||
sliced_source_arg(self.source, node),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
fn record_parse_error(&mut self, loc: trap::Label, mesg: &diagnostics::DiagnosticMessage) {
|
||||
self.diagnostics_writer.write(mesg);
|
||||
let id = self.trap_writer.fresh_id();
|
||||
@@ -835,6 +862,24 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Walks the original tree-sitter tree and emits a `TriviaToken` for every
|
||||
/// `extra` node (e.g. a comment). Used to preserve comments that would
|
||||
/// otherwise be lost after a desugaring pass rewrites the tree.
|
||||
fn traverse_extras(tree: &Tree, visitor: &mut Visitor) {
|
||||
emit_extras_in(visitor, tree.root_node());
|
||||
}
|
||||
|
||||
fn emit_extras_in(visitor: &mut Visitor, node: Node<'_>) {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.is_extra() {
|
||||
visitor.emit_trivia_token(&child);
|
||||
} else {
|
||||
emit_extras_in(visitor, child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn traverse_yeast(tree: &yeast::Ast, visitor: &mut Visitor) {
|
||||
use yeast::Cursor;
|
||||
let mut cursor = tree.walk();
|
||||
|
||||
@@ -68,7 +68,12 @@ pub fn generate(
|
||||
let node_parent_table_name = format!("{}_ast_node_parent", &prefix);
|
||||
let token_name = format!("{}_token", &prefix);
|
||||
let tokeninfo_name = format!("{}_tokeninfo", &prefix);
|
||||
let trivia_token_name = format!("{}_trivia_token", &prefix);
|
||||
let trivia_tokeninfo_name = format!("{}_trivia_tokeninfo", &prefix);
|
||||
let reserved_word_name = format!("{}_reserved_word", &prefix);
|
||||
// When a desugaring is configured, comments and other `extra` nodes are
|
||||
// preserved from the original parse tree as `TriviaToken`s.
|
||||
let has_trivia_tokens = language.desugar.is_some();
|
||||
let effective_node_types: String = match language
|
||||
.desugar
|
||||
.as_ref()
|
||||
@@ -85,28 +90,35 @@ pub fn generate(
|
||||
let nodes = node_types::read_node_types_str(&prefix, &effective_node_types)?;
|
||||
let (dbscheme_entries, mut ast_node_members, token_kinds) = convert_nodes(&nodes);
|
||||
ast_node_members.insert(&token_name);
|
||||
if has_trivia_tokens {
|
||||
ast_node_members.insert(&trivia_token_name);
|
||||
}
|
||||
writeln!(&mut dbscheme_writer, "/*- {} dbscheme -*/", language.name)?;
|
||||
dbscheme::write(&mut dbscheme_writer, &dbscheme_entries)?;
|
||||
let token_case = create_token_case(&token_name, token_kinds);
|
||||
dbscheme::write(
|
||||
&mut dbscheme_writer,
|
||||
&[
|
||||
dbscheme::Entry::Table(create_tokeninfo(&tokeninfo_name, &token_name)),
|
||||
dbscheme::Entry::Case(token_case),
|
||||
dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: &ast_node_name,
|
||||
members: ast_node_members,
|
||||
}),
|
||||
dbscheme::Entry::Table(create_ast_node_location_table(
|
||||
&node_location_table_name,
|
||||
&ast_node_name,
|
||||
)),
|
||||
dbscheme::Entry::Table(create_ast_node_parent_table(
|
||||
&node_parent_table_name,
|
||||
&ast_node_name,
|
||||
)),
|
||||
],
|
||||
)?;
|
||||
let mut dbscheme_tail = vec![
|
||||
dbscheme::Entry::Table(create_tokeninfo(&tokeninfo_name, &token_name)),
|
||||
dbscheme::Entry::Case(token_case),
|
||||
];
|
||||
if has_trivia_tokens {
|
||||
dbscheme_tail.push(dbscheme::Entry::Table(create_tokeninfo(
|
||||
&trivia_tokeninfo_name,
|
||||
&trivia_token_name,
|
||||
)));
|
||||
}
|
||||
dbscheme_tail.push(dbscheme::Entry::Union(dbscheme::Union {
|
||||
name: &ast_node_name,
|
||||
members: ast_node_members,
|
||||
}));
|
||||
dbscheme_tail.push(dbscheme::Entry::Table(create_ast_node_location_table(
|
||||
&node_location_table_name,
|
||||
&ast_node_name,
|
||||
)));
|
||||
dbscheme_tail.push(dbscheme::Entry::Table(create_ast_node_parent_table(
|
||||
&node_parent_table_name,
|
||||
&ast_node_name,
|
||||
)));
|
||||
dbscheme::write(&mut dbscheme_writer, &dbscheme_tail)?;
|
||||
|
||||
let mut body = vec![
|
||||
ql::TopLevel::Class(ql_gen::create_ast_node_class(
|
||||
@@ -116,6 +128,12 @@ pub fn generate(
|
||||
)),
|
||||
ql::TopLevel::Class(ql_gen::create_token_class(&token_name, &tokeninfo_name)),
|
||||
];
|
||||
if has_trivia_tokens {
|
||||
body.push(ql::TopLevel::Class(ql_gen::create_trivia_token_class(
|
||||
&trivia_token_name,
|
||||
&trivia_tokeninfo_name,
|
||||
)));
|
||||
}
|
||||
// Only emit the ReservedWord class when there are actually unnamed token
|
||||
// types in the schema (i.e., @{prefix}_reserved_word exists in the dbscheme).
|
||||
// When converting from a YEAST YAML schema that has no unnamed tokens, this
|
||||
|
||||
@@ -199,6 +199,70 @@ pub fn create_token_class<'a>(token_type: &'a str, tokeninfo: &'a str) -> ql::Cl
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates the `TriviaToken` class. Trivia tokens (e.g. comments) are
|
||||
/// `extra` nodes preserved from the original parse tree even when the tree has
|
||||
/// been rewritten by a desugaring pass. They are not part of the regular
|
||||
/// `Token` hierarchy because they do not appear in the (possibly desugared)
|
||||
/// output schema.
|
||||
pub fn create_trivia_token_class<'a>(
|
||||
trivia_token_type: &'a str,
|
||||
trivia_tokeninfo: &'a str,
|
||||
) -> ql::Class<'a> {
|
||||
let trivia_tokeninfo_arity = 3; // id, kind, value
|
||||
let get_value = ql::Predicate {
|
||||
qldoc: Some(String::from("Gets the source text of this trivia token.")),
|
||||
name: "getValue",
|
||||
overridden: false,
|
||||
is_private: false,
|
||||
is_final: true,
|
||||
return_type: Some(ql::Type::String),
|
||||
formal_parameters: vec![],
|
||||
body: create_get_field_expr_for_column_storage(
|
||||
"result",
|
||||
trivia_tokeninfo,
|
||||
1,
|
||||
trivia_tokeninfo_arity,
|
||||
),
|
||||
overlay: None,
|
||||
};
|
||||
let to_string = ql::Predicate {
|
||||
qldoc: Some(String::from(
|
||||
"Gets a string representation of this element.",
|
||||
)),
|
||||
name: "toString",
|
||||
overridden: true,
|
||||
is_private: false,
|
||||
is_final: true,
|
||||
return_type: Some(ql::Type::String),
|
||||
formal_parameters: vec![],
|
||||
body: ql::Expression::Equals(
|
||||
Box::new(ql::Expression::Var("result")),
|
||||
Box::new(ql::Expression::Dot(
|
||||
Box::new(ql::Expression::Var("this")),
|
||||
"getValue",
|
||||
vec![],
|
||||
)),
|
||||
),
|
||||
overlay: None,
|
||||
};
|
||||
ql::Class {
|
||||
qldoc: Some(String::from(
|
||||
"A trivia token, such as a comment, preserved from the original parse tree.",
|
||||
)),
|
||||
name: "TriviaToken",
|
||||
is_abstract: false,
|
||||
supertypes: vec![ql::Type::At(trivia_token_type), ql::Type::Normal("AstNode")]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
characteristic_predicate: None,
|
||||
predicates: vec![
|
||||
get_value,
|
||||
to_string,
|
||||
create_get_a_primary_ql_class("TriviaToken", false),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
// Creates the `ReservedWord` class.
|
||||
pub fn create_reserved_word_class(db_name: &str) -> ql::Class<'_> {
|
||||
let class_name = "ReservedWord";
|
||||
|
||||
@@ -61,6 +61,18 @@ module Unified {
|
||||
override string getAPrimaryQlClass() { result = "Token" }
|
||||
}
|
||||
|
||||
/** A trivia token, such as a comment, preserved from the original parse tree. */
|
||||
class TriviaToken extends @unified_trivia_token, AstNode {
|
||||
/** Gets the source text of this trivia token. */
|
||||
final string getValue() { unified_trivia_tokeninfo(this, _, result) }
|
||||
|
||||
/** Gets a string representation of this element. */
|
||||
final override string toString() { result = this.getValue() }
|
||||
|
||||
/** Gets the name of the primary QL class for this element. */
|
||||
override string getAPrimaryQlClass() { result = "TriviaToken" }
|
||||
}
|
||||
|
||||
/** Gets the file containing the given `node`. */
|
||||
private @file getNodeFile(@unified_ast_node node) {
|
||||
exists(@location_default loc | unified_ast_node_location(node, loc) |
|
||||
|
||||
18
unified/ql/lib/codeql/unified/Comments.qll
Normal file
18
unified/ql/lib/codeql/unified/Comments.qll
Normal file
@@ -0,0 +1,18 @@
|
||||
/** Provides classes for working with comments. */
|
||||
|
||||
private import unified
|
||||
|
||||
/**
|
||||
* A comment appearing in the source code.
|
||||
*/
|
||||
class Comment extends TriviaToken {
|
||||
// At the moment, comments are the only type trivia token we extract
|
||||
/**
|
||||
* Gets the text inside this comment, not counting the delimeters.
|
||||
*/
|
||||
string getCommentText() {
|
||||
result = this.getValue().regexpCapture("//(.*)", 1)
|
||||
or
|
||||
result = this.getValue().regexpCapture("(?s)/\\*(.*)\\*/", 1)
|
||||
}
|
||||
}
|
||||
@@ -334,7 +334,13 @@ case @unified_token.kind of
|
||||
;
|
||||
|
||||
|
||||
@unified_ast_node = @unified_apply_pattern | @unified_binary_expr | @unified_block_stmt | @unified_call_expr | @unified_expr_condition | @unified_expr_stmt | @unified_guard_if_stmt | @unified_if_stmt | @unified_lambda_expr | @unified_let_pattern_condition | @unified_member_access_expr | @unified_name_expr | @unified_parameter | @unified_sequence_condition | @unified_token | @unified_top_level | @unified_tuple_pattern | @unified_unary_expr | @unified_var_pattern | @unified_variable_declaration_stmt | @unified_variable_declarator
|
||||
unified_trivia_tokeninfo(
|
||||
unique int id: @unified_trivia_token,
|
||||
int kind: int ref,
|
||||
string value: string ref
|
||||
);
|
||||
|
||||
@unified_ast_node = @unified_apply_pattern | @unified_binary_expr | @unified_block_stmt | @unified_call_expr | @unified_expr_condition | @unified_expr_stmt | @unified_guard_if_stmt | @unified_if_stmt | @unified_lambda_expr | @unified_let_pattern_condition | @unified_member_access_expr | @unified_name_expr | @unified_parameter | @unified_sequence_condition | @unified_token | @unified_top_level | @unified_trivia_token | @unified_tuple_pattern | @unified_unary_expr | @unified_var_pattern | @unified_variable_declaration_stmt | @unified_variable_declarator
|
||||
|
||||
unified_ast_node_location(
|
||||
unique int node: @unified_ast_node ref,
|
||||
|
||||
8
unified/ql/lib/unified.qll
Normal file
8
unified/ql/lib/unified.qll
Normal file
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Provides classes for working with the AST, as well as files and locations.
|
||||
*/
|
||||
|
||||
import codeql.Locations
|
||||
import codeql.files.FileSystem
|
||||
import codeql.unified.Ast::Unified
|
||||
import codeql.unified.Comments
|
||||
3
unified/ql/test/library-tests/comments/comments.expected
Normal file
3
unified/ql/test/library-tests/comments/comments.expected
Normal file
@@ -0,0 +1,3 @@
|
||||
| comments.swift:1:1:1:22 | // Hello this is swift | Hello this is swift |
|
||||
| comments.swift:3:1:6:3 | /*\n * This is a multi-line comment\n * It should be ignored by the parser\n */ | \n * This is a multi-line comment\n * It should be ignored by the parser\n |
|
||||
| comments.swift:9:5:9:36 | // This is a single-line comment | This is a single-line comment |
|
||||
3
unified/ql/test/library-tests/comments/comments.ql
Normal file
3
unified/ql/test/library-tests/comments/comments.ql
Normal file
@@ -0,0 +1,3 @@
|
||||
import unified
|
||||
|
||||
query predicate comments(Comment c, string text) { text = c.getCommentText() }
|
||||
11
unified/ql/test/library-tests/comments/comments.swift
Normal file
11
unified/ql/test/library-tests/comments/comments.swift
Normal file
@@ -0,0 +1,11 @@
|
||||
// Hello this is swift
|
||||
|
||||
/*
|
||||
* This is a multi-line comment
|
||||
* It should be ignored by the parser
|
||||
*/
|
||||
|
||||
func hello() {
|
||||
// This is a single-line comment
|
||||
print("Hello, world!")
|
||||
}
|
||||
Reference in New Issue
Block a user