Merge pull request #12269 from github/tausbn/ql-add-json-extraction

QL: Add JSON (+C/L) extraction
This commit is contained in:
Mathias Vorreiter Pedersen
2023-02-21 16:42:19 +00:00
committed by GitHub
8 changed files with 275 additions and 0 deletions

BIN
ql/Cargo.lock generated

Binary file not shown.

View File

@@ -18,6 +18,9 @@ fn main() -> std::io::Result<()> {
.arg("--include-extension=.ql")
.arg("--include-extension=.qll")
.arg("--include-extension=.dbscheme")
.arg("--include-extension=.json")
.arg("--include-extension=.jsonc")
.arg("--include-extension=.jsonl")
.arg("--include=**/qlpack.yml")
.arg("--include=deprecated.blame")
.arg("--size-limit=5m")

View File

@@ -14,6 +14,7 @@ tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", re
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
tree-sitter-ql-yaml = {git = "https://github.com/erik-krogh/tree-sitter-ql.git", rev = "cf704bf3671e1ae148e173464fb65a4d2bbf5f99"}
tree-sitter-blame = {path = "../buramu/tree-sitter-blame"}
tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json.git", rev = "471ceac44d127e609afa349cf0a59370791fe8b3"}
clap = "2.33"
tracing = "0.1"
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }

View File

@@ -88,11 +88,13 @@ fn main() -> std::io::Result<()> {
let dbscheme = tree_sitter_ql_dbscheme::language();
let yaml = tree_sitter_ql_yaml::language();
let blame = tree_sitter_blame::language();
let json = tree_sitter_json::language();
let schema = node_types::read_node_types_str("ql", tree_sitter_ql::NODE_TYPES)?;
let dbscheme_schema =
node_types::read_node_types_str("dbscheme", tree_sitter_ql_dbscheme::NODE_TYPES)?;
let yaml_schema = node_types::read_node_types_str("yaml", tree_sitter_ql_yaml::NODE_TYPES)?;
let blame_schema = node_types::read_node_types_str("blame", tree_sitter_blame::NODE_TYPES)?;
let json_schema = node_types::read_node_types_str("json", tree_sitter_json::NODE_TYPES)?;
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
let lines = lines?;
@@ -134,6 +136,19 @@ fn main() -> std::io::Result<()> {
&source,
&code_ranges,
)?
} else if line.ends_with(".json")
|| line.ends_with(".jsonl")
|| line.ends_with(".jsonc")
{
extractor::extract(
json,
"json",
&json_schema,
&mut trap_writer,
&path,
&source,
&code_ranges,
)?
} else if line.ends_with(".blame") {
extractor::extract(
blame,

View File

@@ -15,3 +15,4 @@ tree-sitter-ql = { git = "https://github.com/tree-sitter/tree-sitter-ql.git", re
tree-sitter-ql-dbscheme = { git = "https://github.com/erik-krogh/tree-sitter-ql-dbscheme.git", rev = "63e1344353f63931e88bfbc2faa2e78e1421b213"}
tree-sitter-ql-yaml = {git = "https://github.com/erik-krogh/tree-sitter-ql.git", rev = "cf704bf3671e1ae148e173464fb65a4d2bbf5f99"}
tree-sitter-blame = {path = "../buramu/tree-sitter-blame"}
tree-sitter-json = { git = "https://github.com/tausbn/tree-sitter-json.git", rev = "471ceac44d127e609afa349cf0a59370791fe8b3"}

View File

@@ -581,6 +581,10 @@ fn main() -> std::io::Result<()> {
name: "Blame".to_owned(),
node_types: tree_sitter_blame::NODE_TYPES,
},
Language {
name: "JSON".to_owned(),
node_types: tree_sitter_json::NODE_TYPES,
},
];
let mut dbscheme_writer = LineWriter::new(File::create(dbscheme_path)?);
write!(

View File

@@ -1856,3 +1856,162 @@ module Blame {
final override string getAPrimaryQlClass() { result = "Number" }
}
}
module JSON {
/** The base class for all AST nodes */
class AstNode extends @json_ast_node {
/** Gets a string representation of this element. */
string toString() { result = this.getAPrimaryQlClass() }
/** Gets the location of this element. */
final L::Location getLocation() { json_ast_node_info(this, _, _, result) }
/** Gets the parent of this element. */
final AstNode getParent() { json_ast_node_info(this, result, _, _) }
/** Gets the index of this node among the children of its parent. */
final int getParentIndex() { json_ast_node_info(this, _, result, _) }
/** Gets a field or child node of this node. */
AstNode getAFieldOrChild() { none() }
/** Gets the name of the primary QL class for this element. */
string getAPrimaryQlClass() { result = "???" }
/** Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. */
string getPrimaryQlClasses() { result = concat(this.getAPrimaryQlClass(), ",") }
}
/** A token. */
class Token extends @json_token, AstNode {
/** Gets the value of this token. */
final string getValue() { json_tokeninfo(this, _, result) }
/** Gets a string representation of this element. */
final override string toString() { result = this.getValue() }
/** Gets the name of the primary QL class for this element. */
override string getAPrimaryQlClass() { result = "Token" }
}
/** A reserved word. */
class ReservedWord extends @json_reserved_word, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "ReservedWord" }
}
/** A class representing `array` nodes. */
class Array extends @json_array, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Array" }
/** Gets the `i`th child of this node. */
final Value getChild(int i) { json_array_child(this, i, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_array_child(this, _, result) }
}
/** A class representing `comment` tokens. */
class Comment extends @json_token_comment, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Comment" }
}
/** A class representing `document` nodes. */
class Document extends @json_document, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Document" }
/** Gets the `i`th child of this node. */
final Value getChild(int i) { json_document_child(this, i, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_document_child(this, _, result) }
}
/** A class representing `escape_sequence` tokens. */
class EscapeSequence extends @json_token_escape_sequence, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "EscapeSequence" }
}
/** A class representing `false` tokens. */
class False extends @json_token_false, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "False" }
}
/** A class representing `null` tokens. */
class Null extends @json_token_null, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Null" }
}
/** A class representing `number` tokens. */
class Number extends @json_token_number, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Number" }
}
/** A class representing `object` nodes. */
class Object extends @json_object, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Object" }
/** Gets the `i`th child of this node. */
final Pair getChild(int i) { json_object_child(this, i, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_object_child(this, _, result) }
}
/** A class representing `pair` nodes. */
class Pair extends @json_pair, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "Pair" }
/** Gets the node corresponding to the field `key`. */
final AstNode getKey() { json_pair_def(this, result, _) }
/** Gets the node corresponding to the field `value`. */
final Value getValue() { json_pair_def(this, _, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() {
json_pair_def(this, result, _) or json_pair_def(this, _, result)
}
}
/** A class representing `string` nodes. */
class String extends @json_string__, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "String" }
/** Gets the child of this node. */
final StringContent getChild() { json_string_child(this, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_string_child(this, result) }
}
/** A class representing `string_content` nodes. */
class StringContent extends @json_string_content, AstNode {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "StringContent" }
/** Gets the `i`th child of this node. */
final EscapeSequence getChild(int i) { json_string_content_child(this, i, result) }
/** Gets a field or child node of this node. */
final override AstNode getAFieldOrChild() { json_string_content_child(this, _, result) }
}
/** A class representing `true` tokens. */
class True extends @json_token_true, Token {
/** Gets the name of the primary QL class for this element. */
final override string getAPrimaryQlClass() { result = "True" }
}
class Value extends @json_value, AstNode { }
}

View File

@@ -1249,3 +1249,95 @@ blame_ast_node_info(
int loc: @location ref
);
#keyset[json_array, index]
json_array_child(
int json_array: @json_array ref,
int index: int ref,
unique int child: @json_value ref
);
json_array_def(
unique int id: @json_array
);
#keyset[json_document, index]
json_document_child(
int json_document: @json_document ref,
int index: int ref,
unique int child: @json_value ref
);
json_document_def(
unique int id: @json_document
);
#keyset[json_object, index]
json_object_child(
int json_object: @json_object ref,
int index: int ref,
unique int child: @json_pair ref
);
json_object_def(
unique int id: @json_object
);
@json_pair_key_type = @json_string__ | @json_token_number
json_pair_def(
unique int id: @json_pair,
int key__: @json_pair_key_type ref,
int value: @json_value ref
);
json_string_child(
unique int json_string__: @json_string__ ref,
unique int child: @json_string_content ref
);
json_string_def(
unique int id: @json_string__
);
#keyset[json_string_content, index]
json_string_content_child(
int json_string_content: @json_string_content ref,
int index: int ref,
unique int child: @json_token_escape_sequence ref
);
json_string_content_def(
unique int id: @json_string_content
);
@json_value = @json_array | @json_object | @json_string__ | @json_token_false | @json_token_null | @json_token_number | @json_token_true
json_tokeninfo(
unique int id: @json_token,
int kind: int ref,
string value: string ref
);
case @json_token.kind of
0 = @json_reserved_word
| 1 = @json_token_comment
| 2 = @json_token_escape_sequence
| 3 = @json_token_false
| 4 = @json_token_null
| 5 = @json_token_number
| 6 = @json_token_true
;
@json_ast_node = @json_array | @json_document | @json_object | @json_pair | @json_string__ | @json_string_content | @json_token
@json_ast_node_parent = @file | @json_ast_node
#keyset[parent, parent_index]
json_ast_node_info(
unique int node: @json_ast_node ref,
int parent: @json_ast_node_parent ref,
int parent_index: int ref,
int loc: @location ref
);