Fix location handling to match common db schema requirements

This commit is contained in:
Nick Rolfe
2020-10-29 19:41:41 +00:00
parent 556507cec7
commit 4d5d80c749
6 changed files with 361 additions and 30 deletions

View File

@@ -34,15 +34,31 @@ impl Extractor {
.parser
.parse(&source, None)
.expect("Failed to parse file");
let mut counter = -1;
// Create a label for the current file and increment the counter so that
// label doesn't get redefined.
counter += 1;
let file_label = Label::Normal(counter);
let mut visitor = Visitor {
source: &source,
trap_output: vec![TrapEntry::Comment(format!(
"Auto-generated TRAP file for {}",
path.display()
))],
counter: -1,
trap_output: vec![
TrapEntry::Comment(format!("Auto-generated TRAP file for {}", path.display())),
TrapEntry::New(file_label),
TrapEntry::GenericTuple(
"files".to_owned(),
vec![
Arg::Label(file_label),
Arg::String(format!("{}", path.canonicalize()?.display())),
Arg::String(format!("{}", path.file_name().unwrap().to_string_lossy())),
Arg::String(format!("{}", path.extension().unwrap().to_string_lossy())),
Arg::Int(0), // 0 = unknown
],
),
],
counter,
// TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()),
file_label,
stack: Vec::new(),
tables: build_schema_lookup(&self.schema),
union_types: build_union_type_lookup(&self.schema),
@@ -77,6 +93,9 @@ fn build_union_type_lookup<'a>(schema: &'a Vec<Entry>) -> Map<&'a TypeName, &'a
struct Visitor<'a> {
/// The file path of the source code (as string)
path: String,
/// The label to use whenever we need to refer to the `@file` entity of this
/// source file.
file_label: Label,
/// The source code as a UTF-8 byte array
source: &'a Vec<u8>,
/// The accumulated trap entries
@@ -135,7 +154,7 @@ impl Visitor<'_> {
self.trap_output.push(TrapEntry::New(id));
self.trap_output.push(TrapEntry::New(loc));
self.trap_output
.push(location_for(&self.source, &self.path, loc, node));
.push(location_for(&self.source, &self.file_label, loc, node));
let table_name = node_type_name(node.kind(), node.is_named());
let args: Option<Vec<Arg>>;
if fields.is_empty() {
@@ -283,7 +302,7 @@ fn sliced_source_arg(source: &Vec<u8>, n: Node) -> Arg {
}
// Emit a 'Located' TrapEntry for the provided node, appropriately calibrated.
fn location_for<'a>(source: &Vec<u8>, fp: &String, label: Label, n: Node) -> TrapEntry {
fn location_for<'a>(source: &Vec<u8>, file_label: &Label, label: Label, n: Node) -> TrapEntry {
// Tree-sitter row, column values are 0-based while CodeQL starts
// counting at 1. In addition Tree-sitter's row and column for the
// end position are exclusive while CodeQL's end positions are inclusive.
@@ -327,7 +346,7 @@ fn location_for<'a>(source: &Vec<u8>, fp: &String, label: Label, n: Node) -> Tra
}
TrapEntry::Located(vec![
Arg::Label(label),
Arg::String(fp.to_owned()),
Arg::Label(file_label.clone()),
Arg::Int(start_line),
Arg::Int(start_col),
Arg::Int(end_line),
@@ -377,6 +396,8 @@ enum TrapEntry {
ChildOf(String, Label, String, Option<Index>, Label),
// @location(loc, path, r1, c1, r2, c2)
Located(Vec<Arg>),
/// foo_bar(arg?)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for TrapEntry {
@@ -424,6 +445,16 @@ impl fmt::Display for TrapEntry {
args.get(4).unwrap(),
args.get(5).unwrap(),
),
TrapEntry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
TrapEntry::Comment(line) => write!(f, "// {}", line),
}
}

View File

@@ -113,7 +113,14 @@ fn add_field(
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
let mut entries: Vec<dbscheme::Entry> = vec![
create_location_table(),
create_location_union(),
create_locations_default_table(),
create_sourceline_union(),
create_numlines_table(),
create_files_table(),
create_folders_table(),
create_container_union(),
create_containerparent_table(),
create_source_location_prefix_table(),
];
let mut top_members: Vec<String> = Vec::new();
@@ -212,23 +219,103 @@ fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::
dbscheme::write(&language.name, &mut file, &entries)
}
fn create_location_table() -> dbscheme::Entry {
fn create_location_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "location".to_owned(),
members: vec!["location_default".to_owned()],
})
}
fn create_files_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "location".to_string(),
name: "files".to_owned(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_owned(),
ql_type: ql::Type::AtType("file".to_owned()),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "simple".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "ext".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "fromSource".to_owned(),
unique: false,
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
})
}
fn create_folders_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "folders".to_owned(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_owned(),
ql_type: ql::Type::AtType("folder".to_owned()),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "simple".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
],
})
}
fn create_locations_default_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "locations_default".to_string(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
ql_type: ql::Type::AtType("location".to_string()),
ql_type: ql::Type::AtType("location_default".to_string()),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "file_path".to_string(),
ql_type: ql::Type::String,
db_type: dbscheme::DbColumnType::Int,
name: "file".to_string(),
ql_type: ql::Type::AtType("file".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
@@ -263,6 +350,80 @@ fn create_location_table() -> dbscheme::Entry {
})
}
fn create_sourceline_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "sourceline".to_owned(),
members: vec!["file".to_owned()],
})
}
fn create_numlines_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "numlines".to_owned(),
columns: vec![
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "element_id".to_string(),
ql_type: ql::Type::AtType("sourceline".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_lines".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_code".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_comment".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
keysets: None,
})
}
fn create_container_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "container".to_owned(),
members: vec!["folder".to_owned(), "file".to_owned()],
})
}
fn create_containerparent_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "containerparent".to_owned(),
columns: vec![
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "parent".to_string(),
ql_type: ql::Type::AtType("container".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "child".to_string(),
ql_type: ql::Type::AtType("container".to_owned()),
ql_type_is_ref: true,
},
],
keysets: None,
})
}
fn create_source_location_prefix_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "sourceLocationPrefix".to_string(),

View File

@@ -79,7 +79,10 @@ pub enum Expression {
String(String),
Pred(String, Vec<Expression>),
Or(Vec<Expression>),
And(Vec<Expression>),
Equals(Box<Expression>, Box<Expression>),
Exists(Vec<FormalParameter>, Box<Expression>),
Dot(Box<Expression>, String, Vec<Expression>),
}
impl fmt::Display for Expression {
@@ -110,7 +113,40 @@ impl fmt::Display for Expression {
Ok(())
}
}
Expression::And(conjuncts) => {
if conjuncts.is_empty() {
write!(f, "any()")
} else {
for (index, disjunct) in conjuncts.iter().enumerate() {
if index > 0 {
write!(f, " and ")?;
}
write!(f, "{}", disjunct)?;
}
Ok(())
}
}
Expression::Equals(a, b) => write!(f, "{} = {}", a, b),
Expression::Exists(params, formula) => {
write!(f, "exists(")?;
for (index, param) in params.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", param)?;
}
write!(f, " | {})", formula)
}
Expression::Dot(x, member_pred, args) => {
write!(f, "{}.{}(", x, member_pred)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
}
}
}

View File

@@ -130,6 +130,48 @@ fn create_none_predicate(
}
}
/// Creates the hard-coded `File` class.
fn create_file_class() -> ql::Class {
let get_absolute_path = ql::Predicate {
name: "getAbsolutePath".to_owned(),
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
//override string getAbsolutePath() { files(underlyingElement(this), result, _, _, _) }
body: ql::Expression::Pred(
"files".to_owned(),
vec![
ql::Expression::Var("this".to_owned()),
ql::Expression::Var("result".to_owned()),
ql::Expression::Var("_".to_owned()),
ql::Expression::Var("_".to_owned()),
ql::Expression::Var("_".to_owned()),
],
),
};
let to_string = ql::Predicate {
name: "toString".to_owned(),
overridden: false,
return_type: Some(ql::Type::String),
formal_parameters: vec![],
body: ql::Expression::Equals(
Box::new(ql::Expression::Var("result".to_owned())),
Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("this".to_owned())),
"getAbsolutePath".to_owned(),
vec![],
)),
),
};
ql::Class {
name: "File".to_owned(),
is_abstract: false,
supertypes: vec![ql::Type::AtType("file".to_owned())],
characteristic_predicate: None,
predicates: vec![get_absolute_path, to_string],
}
}
/// Creates the special `Location` class to wrap the location table.
fn create_location_class() -> ql::Class {
let to_string = ql::Predicate {
@@ -168,16 +210,32 @@ fn create_location_class() -> ql::Class {
param_type: ql::Type::Int,
},
],
body: ql::Expression::Pred(
"location".to_owned(),
vec![
ql::Expression::Var("this".to_owned()),
ql::Expression::Var("filePath".to_owned()),
ql::Expression::Var("startLine".to_owned()),
ql::Expression::Var("startColumn".to_owned()),
ql::Expression::Var("endLine".to_owned()),
ql::Expression::Var("endColumn".to_owned()),
],
body: ql::Expression::Exists(
vec![ql::FormalParameter {
param_type: ql::Type::Normal("File".to_owned()),
name: "f".to_owned(),
}],
Box::new(ql::Expression::And(vec![
ql::Expression::Pred(
"locations_default".to_owned(),
vec![
ql::Expression::Var("this".to_owned()),
ql::Expression::Var("f".to_owned()),
ql::Expression::Var("startLine".to_owned()),
ql::Expression::Var("startColumn".to_owned()),
ql::Expression::Var("endLine".to_owned()),
ql::Expression::Var("endColumn".to_owned()),
],
),
ql::Expression::Equals(
Box::new(ql::Expression::Var("filePath".to_owned())),
Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("f".to_owned())),
"getAbsolutePath".to_owned(),
vec![],
)),
),
])),
),
};
ql::Class {
@@ -441,7 +499,11 @@ fn create_field_getters(
/// Converts the given node types into CodeQL classes wrapping the dbscheme.
pub fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<ql::Class> {
let supertype_map = create_supertype_map(nodes);
let mut classes: Vec<ql::Class> = vec![create_location_class(), create_top_class()];
let mut classes: Vec<ql::Class> = vec![
create_file_class(),
create_location_class(),
create_top_class(),
];
for node in nodes {
match &node {

View File

@@ -3,13 +3,22 @@
* Automatically generated from the tree-sitter grammar; do not edit
*/
class File extends @file {
string getAbsolutePath() { files(this, result, _, _, _) }
string toString() { result = this.getAbsolutePath() }
}
class Location extends @location {
string toString() { result = "Location" }
predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
location(this, filePath, startLine, startColumn, endLine, endColumn)
exists(File f |
locations_default(this, f, startLine, startColumn, endLine, endColumn) and
filePath = f.getAbsolutePath()
)
}
}

View File

@@ -1,15 +1,47 @@
// CodeQL database schema for Ruby
// Automatically generated from the tree-sitter grammar; do not edit
location(
unique int id: @location,
string file_path: string ref,
@location = @location_default
locations_default(
unique int id: @location_default,
int file: @file ref,
int start_line: int ref,
int start_column: int ref,
int end_line: int ref,
int end_column: int ref
);
@sourceline = @file
numlines(
int element_id: @sourceline ref,
int num_lines: int ref,
int num_code: int ref,
int num_comment: int ref
);
files(
unique int id: @file,
string name: string ref,
string simple: string ref,
string ext: string ref,
int fromSource: int ref
);
folders(
unique int id: @folder,
string name: string ref,
string simple: string ref
);
@container = @folder | @file
containerparent(
int parent: @container ref,
unique int child: @container ref
);
sourceLocationPrefix(
string prefix: string ref
);