mirror of
https://github.com/github/codeql.git
synced 2025-12-18 01:33:15 +01:00
Python: Copy Python extractor to codeql repo
This commit is contained in:
572
python/extractor/tsg-python/src/main.rs
Normal file
572
python/extractor/tsg-python/src/main.rs
Normal file
@@ -0,0 +1,572 @@
|
||||
// -*- coding: utf-8 -*-
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Copyright © 2021, GitHub.
|
||||
// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
|
||||
// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use anyhow::Context as _;
|
||||
use anyhow::Result;
|
||||
use clap::App;
|
||||
use clap::Arg;
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_graph::ast::File;
|
||||
use tree_sitter_graph::functions::Functions;
|
||||
use tree_sitter_graph::ExecutionConfig;
|
||||
use tree_sitter_graph::Identifier;
|
||||
use tree_sitter_graph::NoCancellation;
|
||||
use tree_sitter_graph::Variables;
|
||||
|
||||
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
pub mod extra_functions {
|
||||
use tree_sitter_graph::functions::{Function, Parameters};
|
||||
use tree_sitter_graph::graph::{Graph, Value};
|
||||
use tree_sitter_graph::{ExecutionError, Identifier};
|
||||
|
||||
pub struct Location;
|
||||
|
||||
fn get_location(node: Value, graph: &Graph) -> Result<Value, ExecutionError> {
|
||||
let node = graph[node.into_syntax_node_ref()?];
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
Ok(Value::List(
|
||||
vec![start.row, start.column, end.row, end.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
|
||||
impl Function for Location {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = parameters.param()?;
|
||||
parameters.finish()?;
|
||||
get_location(node, graph)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LocationStart;
|
||||
|
||||
impl Function for LocationStart {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let start = node.start_position();
|
||||
Ok(Value::List(
|
||||
vec![start.row, start.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LocationEnd;
|
||||
|
||||
impl Function for LocationEnd {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let end = node.end_position();
|
||||
Ok(Value::List(
|
||||
vec![end.row, end.column]
|
||||
.into_iter()
|
||||
.map(|v| Value::from(v as u32))
|
||||
.collect(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AstNode;
|
||||
|
||||
impl Function for AstNode {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let tree_sitter_node = parameters.param()?;
|
||||
let kind = parameters.param()?;
|
||||
parameters.finish()?;
|
||||
let node = graph.add_graph_node();
|
||||
let loc = get_location(tree_sitter_node, graph)?;
|
||||
graph[node]
|
||||
.attributes
|
||||
.add(Identifier::from("_location"), loc)
|
||||
.map_err(|_| {
|
||||
ExecutionError::DuplicateAttribute(format!(
|
||||
" _location on graph node ({:?})",
|
||||
node
|
||||
))
|
||||
})?;
|
||||
graph[node]
|
||||
.attributes
|
||||
.add(Identifier::from("_kind"), kind)
|
||||
.map_err(|_| {
|
||||
ExecutionError::DuplicateAttribute(format!(" _kind on graph node ({:?})", node))
|
||||
})?;
|
||||
Ok(Value::GraphNode(node))
|
||||
}
|
||||
}
|
||||
|
||||
/// A struct representing the prefix on a Python string.
|
||||
struct Prefix {
|
||||
flags: String,
|
||||
quotes: String,
|
||||
}
|
||||
|
||||
impl Prefix {
|
||||
fn full(&self) -> String {
|
||||
format!("{}{}", self.flags, self.quotes)
|
||||
}
|
||||
|
||||
fn safe(&self) -> Prefix {
|
||||
Prefix {
|
||||
flags: self.flags.clone().replace("f", "").replace("F", ""),
|
||||
quotes: self.quotes.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_prefix(s: &str) -> Prefix {
|
||||
let flags_matcher = regex::Regex::new("^[bfurBFUR]{0,2}").unwrap();
|
||||
let mut end = 0;
|
||||
let flags = match flags_matcher.find(s) {
|
||||
Some(m) => {
|
||||
end = m.end();
|
||||
&s[m.start()..m.end()]
|
||||
}
|
||||
None => "",
|
||||
};
|
||||
let mut quotes = "";
|
||||
if s[end..].starts_with("\"\"\"") {
|
||||
quotes = "\"\"\"";
|
||||
} else if s[end..].starts_with("'''") {
|
||||
quotes = "'''";
|
||||
} else if s[end..].starts_with('"') {
|
||||
quotes = "\"";
|
||||
} else if s[end..].starts_with('\'') {
|
||||
quotes = "'";
|
||||
} else if s[end..].starts_with('}') {
|
||||
quotes = "}";
|
||||
}
|
||||
Prefix {
|
||||
flags: flags.to_lowercase().to_owned(),
|
||||
quotes: quotes.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_prefix() {
|
||||
let p = get_prefix("rb'''hello'''");
|
||||
assert_eq!(p.flags, "rb");
|
||||
assert_eq!(p.quotes, "'''");
|
||||
let p = get_prefix("Br\"\"\"hello\"\"\"");
|
||||
assert_eq!(p.flags, "Br");
|
||||
assert_eq!(p.quotes, "\"\"\"");
|
||||
let p = get_prefix("FR\"hello\"");
|
||||
assert_eq!(p.flags, "FR");
|
||||
assert_eq!(p.quotes, "\"");
|
||||
let p = get_prefix("uR'hello'");
|
||||
assert_eq!(p.flags, "uR");
|
||||
assert_eq!(p.quotes, "'");
|
||||
let p = get_prefix("''");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "'");
|
||||
let p = get_prefix("\"\"");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "\"");
|
||||
let p = get_prefix("\"\"\"\"\"\"");
|
||||
assert_eq!(p.flags, "");
|
||||
assert_eq!(p.quotes, "\"\"\"");
|
||||
}
|
||||
|
||||
fn get_string_contents(s: String) -> String {
|
||||
let prefix = get_prefix(&s);
|
||||
let contents = s.clone();
|
||||
let contents = contents.strip_prefix(prefix.full().as_str()).unwrap();
|
||||
let contents = contents.strip_suffix(prefix.quotes.as_str()).unwrap();
|
||||
|
||||
contents.to_owned()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_string_contents() {
|
||||
let s = "rb'''hello'''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "Br\"\"\"hello\"\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "FR\"hello\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "uR'hello'";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "hello");
|
||||
let s = "''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "\"\"\"\"\"\"";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
let s = "''''''";
|
||||
assert_eq!(get_string_contents(s.to_owned()), "");
|
||||
}
|
||||
|
||||
pub struct StringPrefix;
|
||||
|
||||
impl Function for StringPrefix {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]).full();
|
||||
Ok(Value::String(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringContents;
|
||||
|
||||
impl Function for StringContents {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let contents = get_string_contents(source[node.byte_range()].to_owned());
|
||||
Ok(Value::String(contents))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StringQuotes;
|
||||
|
||||
impl Function for StringQuotes {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]);
|
||||
Ok(Value::String(prefix.quotes))
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a version of the prefix that can be used in a call to `literal_eval`. To do so, we must remove
|
||||
// any `f` or `F` characters, if present.
|
||||
pub struct StringSafePrefix;
|
||||
|
||||
impl Function for StringSafePrefix {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]).full();
|
||||
let prefix = prefix.replace("f", "").replace("F", "");
|
||||
Ok(Value::String(prefix))
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a version of the string where `f` and `F` have been stripped from the prefix.
|
||||
pub struct SafeString;
|
||||
|
||||
impl Function for SafeString {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let prefix = get_prefix(&source[node.byte_range()]);
|
||||
let contents = get_string_contents(source[node.byte_range()].to_owned());
|
||||
let s = format!("{}{}{}", prefix.safe().full(), contents, prefix.quotes);
|
||||
Ok(Value::String(s))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnnamedChildIndex;
|
||||
|
||||
impl Function for UnnamedChildIndex {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let parent = match node.parent() {
|
||||
Some(parent) => parent,
|
||||
None => {
|
||||
return Err(ExecutionError::FunctionFailed(
|
||||
"unnamed-child-index".into(),
|
||||
format!("Cannot call child-index on the root node"),
|
||||
))
|
||||
}
|
||||
};
|
||||
let mut tree_cursor = parent.walk();
|
||||
let index = parent
|
||||
.children(&mut tree_cursor)
|
||||
.position(|child| child == node)
|
||||
.ok_or_else(|| {
|
||||
ExecutionError::FunctionFailed(
|
||||
"unnamed-child-index".into(),
|
||||
format!("Called child-index on a non-named child"),
|
||||
)
|
||||
})?;
|
||||
Ok(Value::Integer(index as u32))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ConcatenateStrings;
|
||||
|
||||
impl Function for ConcatenateStrings {
|
||||
fn call(
|
||||
&self,
|
||||
_graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let mut result = String::new();
|
||||
while let Ok(param) = parameters.param() {
|
||||
let string = param.into_string()?;
|
||||
result.push_str(string.as_str());
|
||||
}
|
||||
Ok(Value::String(result))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct InstanceOf;
|
||||
|
||||
impl Function for InstanceOf {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let class_name = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
let node_type = node.kind();
|
||||
let class_name = class_name.as_str();
|
||||
let is_instance = node_type == class_name;
|
||||
Ok(Value::Boolean(is_instance))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GetParent;
|
||||
|
||||
impl Function for GetParent {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
parameters.finish()?;
|
||||
let parent = node.parent().ok_or_else(|| {
|
||||
ExecutionError::FunctionFailed(
|
||||
"get-parent".into(),
|
||||
format!("Cannot call get-parent on the root node"),
|
||||
)
|
||||
})?;
|
||||
Ok(Value::SyntaxNode(graph.add_syntax_node(parent)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HasNamedChild;
|
||||
|
||||
impl Function for HasNamedChild {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let field_name = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
let field_name = field_name.as_str();
|
||||
let has_named_child = node.child_by_field_name(field_name).is_some();
|
||||
Ok(Value::Boolean(has_named_child))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IsBooleanOperator;
|
||||
|
||||
impl Function for IsBooleanOperator {
|
||||
fn call(
|
||||
&self,
|
||||
graph: &mut Graph,
|
||||
source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let node = graph[parameters.param()?.into_syntax_node_ref()?];
|
||||
let expected_op_type = parameters.param()?.into_string()?;
|
||||
parameters.finish()?;
|
||||
if let Some(op) = node.child_by_field_name("operator") {
|
||||
let op_type = source[op.byte_range()].to_string();
|
||||
let is_boolean_op = expected_op_type == op_type;
|
||||
Ok(Value::Boolean(is_boolean_op))
|
||||
} else {
|
||||
Ok(Value::Boolean(false))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Modulo;
|
||||
|
||||
impl Function for Modulo {
|
||||
fn call(
|
||||
&self,
|
||||
_graph: &mut Graph,
|
||||
_source: &str,
|
||||
parameters: &mut dyn Parameters,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
let left = parameters.param()?.into_integer()?;
|
||||
let right = parameters.param()?.into_integer()?;
|
||||
parameters.finish()?;
|
||||
Ok(Value::Integer(left % right))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let matches = App::new("tsg-python")
|
||||
.version(BUILD_VERSION)
|
||||
.author("Taus Brock-Nannestad <tausbn@github.com>")
|
||||
.about("Extracts a Python AST from the parse tree given by tree-sitter-python")
|
||||
.arg(
|
||||
Arg::with_name("tsg")
|
||||
.short("t")
|
||||
.long("tsg")
|
||||
.takes_value(true)
|
||||
.required(false),
|
||||
)
|
||||
.arg(Arg::with_name("source").index(1).required(true))
|
||||
.get_matches();
|
||||
|
||||
let tsg_path = if matches.is_present("tsg") {
|
||||
Path::new(matches.value_of("tsg").unwrap())
|
||||
.display()
|
||||
.to_string()
|
||||
} else {
|
||||
"bundled `python.tsg`".to_owned()
|
||||
};
|
||||
let source_path = Path::new(matches.value_of("source").unwrap());
|
||||
let language = tree_sitter_python::language();
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language)?;
|
||||
// Statically include `python.tsg`:
|
||||
let tsg = if matches.is_present("tsg") {
|
||||
std::fs::read(&tsg_path).with_context(|| format!("Error reading TSG file {}", tsg_path))?
|
||||
} else {
|
||||
include_bytes!("../python.tsg").to_vec()
|
||||
};
|
||||
let tsg = String::from_utf8(tsg)?;
|
||||
let source = std::fs::read(source_path)
|
||||
.with_context(|| format!("Error reading source file {}", source_path.display()))?;
|
||||
let source = String::from_utf8(source)?;
|
||||
let tree = parser
|
||||
.parse(&source, None)
|
||||
.ok_or_else(|| anyhow!("Could not parse {}", source_path.display()))?;
|
||||
let file = File::from_str(language, &tsg)
|
||||
.with_context(|| anyhow!("Error parsing TSG file {}", tsg_path))?;
|
||||
let mut functions = Functions::stdlib();
|
||||
functions.add(Identifier::from("location"), extra_functions::Location);
|
||||
functions.add(
|
||||
Identifier::from("location-start"),
|
||||
extra_functions::LocationStart,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("location-end"),
|
||||
extra_functions::LocationEnd,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("string-prefix"),
|
||||
extra_functions::StringPrefix,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("string-contents"),
|
||||
extra_functions::StringContents,
|
||||
);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("string-quotes"),
|
||||
extra_functions::StringQuotes,
|
||||
);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("string-safe-prefix"),
|
||||
extra_functions::StringSafePrefix,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("safe-string"), extra_functions::SafeString);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("unnamed-child-index"),
|
||||
extra_functions::UnnamedChildIndex,
|
||||
);
|
||||
functions.add(Identifier::from("ast-node"), extra_functions::AstNode);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("concatenate-strings"),
|
||||
extra_functions::ConcatenateStrings,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("instance-of"), extra_functions::InstanceOf);
|
||||
|
||||
functions.add(Identifier::from("get-parent"), extra_functions::GetParent);
|
||||
|
||||
functions.add(
|
||||
Identifier::from("has-named-child"),
|
||||
extra_functions::HasNamedChild,
|
||||
);
|
||||
functions.add(
|
||||
Identifier::from("is-boolean-operator"),
|
||||
extra_functions::IsBooleanOperator,
|
||||
);
|
||||
|
||||
functions.add(Identifier::from("mod"), extra_functions::Modulo);
|
||||
let globals = Variables::new();
|
||||
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
|
||||
let graph = file
|
||||
.execute(&tree, &source, &mut config, &NoCancellation)
|
||||
.with_context(|| format!("Could not execute TSG file {}", tsg_path))?;
|
||||
print!("{}", graph.pretty_print());
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user