mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #19929 from github/tausbn/python-update-tree-sitter-dependency
Python: Update `tree-sitter` dependency
This commit is contained in:
@@ -10,7 +10,7 @@ from io import BytesIO
|
||||
|
||||
#Semantic version of extractor.
|
||||
#Update this if any changes are made
|
||||
VERSION = "7.1.3"
|
||||
VERSION = "7.1.4"
|
||||
|
||||
PY_EXTENSIONS = ".py", ".pyw"
|
||||
|
||||
|
||||
@@ -308,47 +308,6 @@ Module: [1, 0] - [23, 0]
|
||||
]
|
||||
ctx: Load
|
||||
ctx: Load
|
||||
TypeAlias: [20, 0] - [20, 41]
|
||||
name:
|
||||
Name: [20, 5] - [20, 9]
|
||||
variable: Variable('Baz2', None)
|
||||
ctx: Store
|
||||
type_parameters: [
|
||||
ParamSpec: [20, 10] - [20, 27]
|
||||
name:
|
||||
Name: [20, 12] - [20, 14]
|
||||
variable: Variable('P2', None)
|
||||
ctx: Store
|
||||
default:
|
||||
List: [20, 17] - [20, 27]
|
||||
elts: [
|
||||
Name: [20, 18] - [20, 21]
|
||||
variable: Variable('int', None)
|
||||
ctx: Load
|
||||
Name: [20, 23] - [20, 26]
|
||||
variable: Variable('str', None)
|
||||
ctx: Load
|
||||
]
|
||||
ctx: Load
|
||||
]
|
||||
value:
|
||||
Subscript: [20, 31] - [20, 41]
|
||||
value:
|
||||
Name: [20, 31] - [20, 35]
|
||||
variable: Variable('Spam', None)
|
||||
ctx: Load
|
||||
index:
|
||||
BinOp: [20, 36] - [20, 40]
|
||||
left:
|
||||
Name: [20, 36] - [20, 36]
|
||||
variable: Variable('', None)
|
||||
ctx: Load
|
||||
op: Pow
|
||||
right:
|
||||
Name: [20, 38] - [20, 40]
|
||||
variable: Variable('P2', None)
|
||||
ctx: Load
|
||||
ctx: Load
|
||||
TypeAlias: [21, 0] - [21, 41]
|
||||
name:
|
||||
Name: [21, 5] - [21, 9]
|
||||
|
||||
@@ -17,6 +17,6 @@ class Qux1[*Ts1 = *tuple[int, bool]]: ...
|
||||
|
||||
# TypeAliases
|
||||
type Foo2[T15, U1 = str] = Bar1[T15, U1]
|
||||
type Baz2[**P2 = [int, str]] = Spam[**P2]
|
||||
# type Baz2[**P2 = [int, str]] = Spam[**P2] # From the PEP, but this is not actually valid syntax!
|
||||
type Qux2[*Ts2 = *tuple[str]] = Ham[*Ts2]
|
||||
type Rab[U2, T15 = str] = Bar2[T15, U2]
|
||||
|
||||
52
python/extractor/tsg-python/Cargo.lock
generated
52
python/extractor/tsg-python/Cargo.lock
generated
@@ -1,12 +1,6 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0453232ace82dee0dd0b4c87a59bd90f7b53b314f3e0f61fe2ee7c8a16482289"
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
@@ -82,12 +76,6 @@ dependencies = [
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.30"
|
||||
@@ -121,15 +109,6 @@ version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
@@ -258,14 +237,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
|
||||
|
||||
[[package]]
|
||||
name = "string-interner"
|
||||
version = "0.12.2"
|
||||
name = "streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "383196d1876517ee6f9f0864d1fc1070331b803335d3c6daaa04bbcccd823c08"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown",
|
||||
]
|
||||
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
@@ -306,30 +281,39 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.20.4"
|
||||
version = "0.24.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e34327f8eac545e3f037382471b2b19367725a242bba7bc45edb9efb49fe39a"
|
||||
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"streaming-iterator",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-graph"
|
||||
version = "0.7.0"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "639d21e886f581d293de5f5081f09af003c54607ff3fa85efa159b243ba1f97a"
|
||||
checksum = "63f86eb73c7d891c4b9b6fe4d4e63dd94c506e4788af7c2296afdcfbeea626cc"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"smallvec",
|
||||
"string-interner",
|
||||
"streaming-iterator",
|
||||
"thiserror",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
|
||||
|
||||
[[package]]
|
||||
name = "tsg-python"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -10,7 +10,7 @@ edition = "2024"
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
regex = "1"
|
||||
tree-sitter = "=0.20.4"
|
||||
tree-sitter-graph = "0.7.0"
|
||||
tree-sitter = "=0.24.7"
|
||||
tree-sitter-graph = "0.12.0"
|
||||
tsp = {path = "tsp"}
|
||||
clap = "4.5"
|
||||
|
||||
@@ -6,16 +6,6 @@
|
||||
(module) @mod
|
||||
{ let @mod.node = (ast-node @mod "Module") }
|
||||
|
||||
(_) @anynode
|
||||
{
|
||||
scan (node-type @anynode) {
|
||||
"^(ERROR|MISSING)$" {
|
||||
let @anynode.node = (ast-node @anynode "SyntaxErrorNode")
|
||||
attr (@anynode.node) source = (source-text @anynode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(parenthesized_expression) @nd
|
||||
{ let @nd.node = (ast-node @nd "Expr") }
|
||||
|
||||
@@ -416,13 +406,13 @@
|
||||
attr (@if.node) _location_end = (location-end @expr)
|
||||
}
|
||||
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr
|
||||
{
|
||||
attr (@child.node) _location_start = (location-start @start)
|
||||
attr (@child.node) _location_end = (location-end @end)
|
||||
}
|
||||
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
|
||||
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr
|
||||
{
|
||||
attr (@end.node) _location_start = (location-start @start)
|
||||
attr (@end.node) _location_end = (location-end @end)
|
||||
@@ -524,7 +514,7 @@
|
||||
attr (@del.node -> @target.node) targets = (named-child-index @target)
|
||||
}
|
||||
|
||||
(delete_statement target: (_) @target) @del
|
||||
(delete_statement target: (_) @target) @_del
|
||||
{
|
||||
attr (@target.node) ctx = "del"
|
||||
}
|
||||
@@ -798,8 +788,8 @@
|
||||
|
||||
(dictionary_comprehension
|
||||
body: (pair
|
||||
key: (_) @key
|
||||
value: (_) @value
|
||||
key: (_) @_key
|
||||
value: (_) @_value
|
||||
)
|
||||
) @genexpr
|
||||
{
|
||||
@@ -1299,7 +1289,7 @@
|
||||
; the index of the left-hand side of the current assignment.
|
||||
|
||||
; Base case, for the outermost assignment we set the outermost node to this node, and the index to zero.
|
||||
(expression_statement (assignment !type) @assign) @expr
|
||||
(expression_statement (assignment !type) @assign) @_expr
|
||||
{
|
||||
let @assign.outermost_assignment = @assign.node
|
||||
let @assign.target_index = 0
|
||||
@@ -1358,7 +1348,7 @@
|
||||
}
|
||||
|
||||
(assignment
|
||||
left: (_) @target
|
||||
left: (_) @_target
|
||||
type: (_)
|
||||
right: (_) @value
|
||||
) @assign
|
||||
@@ -2330,7 +2320,7 @@
|
||||
attr (@operand.node) ctx = "load"
|
||||
}
|
||||
|
||||
(unary_operator "~" @op) @unaryop
|
||||
(unary_operator "~" @_op) @unaryop
|
||||
{
|
||||
attr (@unaryop.node) op = "~"
|
||||
}
|
||||
@@ -2614,7 +2604,7 @@
|
||||
; Async status
|
||||
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
|
||||
; as this is the behaviour of the old parser.
|
||||
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
|
||||
(with_statement "async" "with" @_with_keyword (with_clause . (with_item) @with))
|
||||
{
|
||||
attr (@with.node) is_async = #true
|
||||
}
|
||||
@@ -2800,7 +2790,7 @@
|
||||
(identifier) @obj
|
||||
.
|
||||
(identifier) @attr
|
||||
) @match_value_pattern
|
||||
) @_match_value_pattern
|
||||
{
|
||||
let attribute = (ast-node @attr "Attribute")
|
||||
attr (@attr.node) _skip_to = attribute
|
||||
@@ -2814,7 +2804,7 @@
|
||||
(match_value_pattern
|
||||
.
|
||||
(identifier) @id
|
||||
) @match_value_pattern
|
||||
) @_match_value_pattern
|
||||
{
|
||||
attr (@id.node) ctx = "load"
|
||||
}
|
||||
@@ -3267,8 +3257,8 @@
|
||||
(decorated_definition
|
||||
(decorator (expression) @exp1) @dec1
|
||||
. (comment)* .
|
||||
(decorator (expression) @exp2) @dec2
|
||||
) @decorator
|
||||
(decorator (expression) @_exp2) @dec2
|
||||
) @_decorator
|
||||
{
|
||||
attr (@dec1.node) func = @exp1.node
|
||||
edge @dec1.node -> @dec2.node
|
||||
@@ -3279,7 +3269,7 @@
|
||||
(decorator (expression) @exp) @last
|
||||
. (comment)* .
|
||||
definition: (function_definition) @funcdef
|
||||
) @decorator
|
||||
) @_decorator
|
||||
{
|
||||
attr (@last.node) func = @exp.node
|
||||
edge @last.node -> @funcdef.funcexpr
|
||||
@@ -3291,7 +3281,7 @@
|
||||
(decorator (expression) @exp) @last
|
||||
. (comment)* .
|
||||
definition: (class_definition) @class
|
||||
) @decorator
|
||||
) @_decorator
|
||||
{
|
||||
attr (@last.node) func = @exp.node
|
||||
edge @last.node -> @class.class_expr
|
||||
|
||||
@@ -480,6 +480,102 @@ pub mod extra_functions {
|
||||
}
|
||||
}
|
||||
|
||||
struct TreeIterator<'a> {
|
||||
nodes_to_visit: Vec<tree_sitter::Node<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> TreeIterator<'a> {
|
||||
fn new(root: tree_sitter::Node<'a>) -> Self {
|
||||
Self {
|
||||
nodes_to_visit: vec![root],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TreeIterator<'a> {
|
||||
type Item = tree_sitter::Node<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(node) = self.nodes_to_visit.pop() {
|
||||
// Add all children to the queue for processing
|
||||
let children: Vec<_> = (0..node.child_count())
|
||||
.rev()
|
||||
.filter_map(|i| node.child(i))
|
||||
.collect();
|
||||
self.nodes_to_visit.extend(children);
|
||||
Some(node)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct SyntaxError {
|
||||
start_pos: tree_sitter::Point,
|
||||
end_pos: tree_sitter::Point,
|
||||
source: String,
|
||||
}
|
||||
|
||||
fn syntax_errors_from_tree<'a>(
|
||||
root: tree_sitter::Node<'a>,
|
||||
source: &'a str,
|
||||
) -> impl Iterator<Item = SyntaxError> + 'a {
|
||||
TreeIterator::new(root)
|
||||
.filter(|&node| node.is_error() || node.is_missing())
|
||||
.map(move |node| {
|
||||
let start_pos = node.start_position();
|
||||
let end_pos = node.end_position();
|
||||
let text = &source.get(node.byte_range()).unwrap_or("");
|
||||
SyntaxError {
|
||||
start_pos,
|
||||
end_pos,
|
||||
source: text.to_string(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn add_syntax_error_nodes(graph: &mut tree_sitter_graph::graph::Graph, errors: &[SyntaxError]) {
|
||||
for error in errors {
|
||||
let error_node = graph.add_graph_node();
|
||||
|
||||
// Add _kind attribute
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(
|
||||
tree_sitter_graph::Identifier::from("_kind"),
|
||||
tree_sitter_graph::graph::Value::String("SyntaxErrorNode".to_string()),
|
||||
)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
|
||||
// Add _location attribute
|
||||
let location = tree_sitter_graph::graph::Value::List(
|
||||
vec![
|
||||
error.start_pos.row,
|
||||
error.start_pos.column,
|
||||
error.end_pos.row,
|
||||
error.end_pos.column,
|
||||
]
|
||||
.into_iter()
|
||||
.map(|v| tree_sitter_graph::graph::Value::from(v as u32))
|
||||
.collect(),
|
||||
);
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(tree_sitter_graph::Identifier::from("_location"), location)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
|
||||
// Add source attribute
|
||||
graph[error_node]
|
||||
.attributes
|
||||
.add(
|
||||
tree_sitter_graph::Identifier::from("source"),
|
||||
tree_sitter_graph::graph::Value::String(error.source.clone()),
|
||||
)
|
||||
.expect("Fresh node should not have duplicate attributes");
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let matches = Command::new("tsg-python")
|
||||
.version(BUILD_VERSION)
|
||||
@@ -502,7 +598,7 @@ fn main() -> Result<()> {
|
||||
let source_path = Path::new(matches.get_one::<String>("source").unwrap());
|
||||
let language = tsp::language();
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
parser.set_language(&language)?;
|
||||
// Statically include `python.tsg`:
|
||||
let tsg = if matches.contains_id("tsg") {
|
||||
std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))?
|
||||
@@ -581,10 +677,18 @@ fn main() -> Result<()> {
|
||||
);
|
||||
|
||||
let globals = Variables::new();
|
||||
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
|
||||
let graph = file
|
||||
.execute(&tree, &source, &mut config, &NoCancellation)
|
||||
let config = ExecutionConfig::new(&functions, &globals).lazy(false);
|
||||
let mut graph = file
|
||||
.execute(&tree, &source, &config, &NoCancellation)
|
||||
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
|
||||
|
||||
// Collect and add syntax error nodes to the graph
|
||||
if tree.root_node().has_error() {
|
||||
let syntax_errors: Vec<SyntaxError> =
|
||||
syntax_errors_from_tree(tree.root_node(), &source).collect();
|
||||
add_syntax_error_nodes(&mut graph, &syntax_errors);
|
||||
}
|
||||
|
||||
print!("{}", graph.pretty_print());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ path = "bindings/rust/lib.rs"
|
||||
|
||||
## When updating these dependencies, run `misc/bazel/3rdparty/update_cargo_deps.sh`
|
||||
[dependencies]
|
||||
tree-sitter = ">= 0.20, < 0.21"
|
||||
tree-sitter = "=0.24.7"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.2"
|
||||
|
||||
Reference in New Issue
Block a user