Merge pull request #19 from github/locations

Fix location handling to match common db schema requirements
This commit is contained in:
Nick Rolfe
2020-10-30 16:56:34 +00:00
committed by GitHub
7 changed files with 402 additions and 46 deletions

View File

@@ -17,7 +17,9 @@ pub fn create(language: Language, schema: Vec<Entry>) -> Extractor {
Extractor { parser, schema }
}
impl Extractor {
/// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result<Program> {
let span = span!(
Level::TRACE,
@@ -34,15 +36,37 @@ impl Extractor {
.parser
.parse(&source, None)
.expect("Failed to parse file");
let mut counter = -1;
// Create a label for the current file and increment the counter so that
// label doesn't get redefined.
counter += 1;
let file_label = Label::Normal(counter);
let mut visitor = Visitor {
source: &source,
trap_output: vec![TrapEntry::Comment(format!(
"Auto-generated TRAP file for {}",
path.display()
))],
counter: -1,
trap_output: vec![
TrapEntry::Comment(format!("Auto-generated TRAP file for {}", path.display())),
TrapEntry::MapLabelToKey(file_label, full_id_for_file(path)),
TrapEntry::GenericTuple(
"files".to_owned(),
vec![
Arg::Label(file_label),
Arg::String(normalize_path(path)),
Arg::String(match path.file_name() {
None => "".to_owned(),
Some(file_name) => format!("{}", file_name.to_string_lossy()),
}),
Arg::String(match path.extension() {
None => "".to_owned(),
Some(ext) => format!("{}", ext.to_string_lossy()),
}),
Arg::Int(1), // 1 = from source
],
),
],
counter,
// TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()),
file_label,
stack: Vec::new(),
tables: build_schema_lookup(&self.schema),
union_types: build_union_type_lookup(&self.schema),
@@ -54,6 +78,49 @@ impl Extractor {
}
}
/// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`.
fn normalize_path(path: &Path) -> String {
if cfg!(windows) {
// The way Rust canonicalizes paths doesn't match the CodeQL spec, so we
// have to do a bit of work removing certain prefixes and replacing
// backslashes.
let mut components: Vec<String> = Vec::new();
for component in path.components() {
match component {
std::path::Component::Prefix(prefix) => match prefix.kind() {
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
components.push(format!("{}:", letter as char));
}
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
components.push(x.to_string_lossy().to_string());
}
std::path::Prefix::UNC(server, share)
| std::path::Prefix::VerbatimUNC(server, share) => {
components.push(server.to_string_lossy().to_string());
components.push(share.to_string_lossy().to_string());
}
},
std::path::Component::Normal(n) => {
components.push(n.to_string_lossy().to_string());
}
std::path::Component::RootDir => {}
std::path::Component::CurDir => {}
std::path::Component::ParentDir => {}
}
}
components.join("/")
} else {
// For other operating systems, we can use the canonicalized path
// without modifications.
format!("{}", path.display())
}
}
fn full_id_for_file(path: &Path) -> String {
format!("{};sourcefile", normalize_path(path))
}
fn build_schema_lookup<'a>(schema: &'a Vec<Entry>) -> Map<&'a TypeName, &'a Entry> {
let mut map = std::collections::BTreeMap::new();
for entry in schema {
@@ -77,6 +144,9 @@ fn build_union_type_lookup<'a>(schema: &'a Vec<Entry>) -> Map<&'a TypeName, &'a
struct Visitor<'a> {
/// The file path of the source code (as string)
path: String,
/// The label to use whenever we need to refer to the `@file` entity of this
/// source file.
file_label: Label,
/// The source code as a UTF-8 byte array
source: &'a Vec<u8>,
/// The accumulated trap entries
@@ -132,10 +202,11 @@ impl Visitor<'_> {
self.counter += 1;
let id = Label::Normal(self.counter);
let loc = Label::Location(self.counter);
self.trap_output.push(TrapEntry::New(id));
self.trap_output.push(TrapEntry::New(loc));
self.trap_output
.push(location_for(&self.source, &self.path, loc, node));
self.trap_output.push(TrapEntry::FreshId(id));
let (loc_label_def, loc_tuple) =
location_for(&self.source, &self.file_label, loc, node);
self.trap_output.push(loc_label_def);
self.trap_output.push(loc_tuple);
let table_name = node_type_name(node.kind(), node.is_named());
let args: Option<Vec<Arg>>;
if fields.is_empty() {
@@ -282,8 +353,15 @@ fn sliced_source_arg(source: &Vec<u8>, n: Node) -> Arg {
))
}
// Emit a 'Located' TrapEntry for the provided node, appropriately calibrated.
fn location_for<'a>(source: &Vec<u8>, fp: &String, label: Label, n: Node) -> TrapEntry {
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
// The first is the location and label definition, and the second is the
// 'Located' entry.
fn location_for<'a>(
source: &Vec<u8>,
file_label: &Label,
label: Label,
n: Node,
) -> (TrapEntry, TrapEntry) {
// Tree-sitter row, column values are 0-based while CodeQL starts
// counting at 1. In addition Tree-sitter's row and column for the
// end position are exclusive while CodeQL's end positions are inclusive.
@@ -325,14 +403,23 @@ fn location_for<'a>(source: &Vec<u8>, fp: &String, label: Label, n: Node) -> Tra
);
}
}
TrapEntry::Located(vec![
Arg::Label(label),
Arg::String(fp.to_owned()),
Arg::Int(start_line),
Arg::Int(start_col),
Arg::Int(end_line),
Arg::Int(end_col),
])
(
TrapEntry::MapLabelToKey(
label,
format!(
"loc,{{{}}},{},{},{},{}",
file_label, start_line, start_col, end_line, end_col
),
),
TrapEntry::Located(vec![
Arg::Label(label),
Arg::Label(file_label.clone()),
Arg::Int(start_line),
Arg::Int(start_col),
Arg::Int(end_line),
Arg::Int(end_col),
]),
)
}
fn traverse(tree: &Tree, visitor: &mut Visitor) {
@@ -369,20 +456,25 @@ impl fmt::Display for Program {
}
enum TrapEntry {
// @id = *@
New(Label),
/// Maps the label to a fresh id, e.g. `#123 = *`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7 = @"foo"`.
MapLabelToKey(Label, String),
// @node_def(self, arg?, location)@
Definition(String, Label, Vec<Arg>, Label),
// @node_child(self, index, parent)@
ChildOf(String, Label, String, Option<Index>, Label),
// @location(loc, path, r1, c1, r2, c2)
Located(Vec<Arg>),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for TrapEntry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TrapEntry::New(id) => write!(f, "{} = *", id),
TrapEntry::FreshId(label) => write!(f, "{} = *", label),
TrapEntry::MapLabelToKey(label, key) => write!(f, "{} = @\"{}\"", label, key),
TrapEntry::Definition(n, id, args, loc) => {
let mut args_str = String::new();
for arg in args {
@@ -416,7 +508,7 @@ impl fmt::Display for TrapEntry {
},
TrapEntry::Located(args) => write!(
f,
"location({}, {}, {}, {}, {}, {})",
"locations_default({}, {}, {}, {}, {}, {})",
args.get(0).unwrap(),
args.get(1).unwrap(),
args.get(2).unwrap(),
@@ -424,6 +516,16 @@ impl fmt::Display for TrapEntry {
args.get(4).unwrap(),
args.get(5).unwrap(),
),
TrapEntry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
TrapEntry::Comment(line) => write!(f, "// {}", line),
}
}

View File

@@ -40,7 +40,7 @@ fn main() -> std::io::Result<()> {
let schema = node_types::read_node_types_str(tree_sitter_ruby::NODE_TYPES)?;
let mut extractor = extractor::create(language, schema);
for line in std::io::BufReader::new(file_list).lines() {
let path = PathBuf::from(line?);
let path = PathBuf::from(line?).canonicalize()?;
let trap_file = path_for(&trap_dir, &path, ".trap");
let src_archive_file = path_for(&src_archive_dir, &path, "");
let trap = extractor.extract(&path)?;

View File

@@ -113,7 +113,14 @@ fn add_field(
/// Converts the given tree-sitter node types into CodeQL dbscheme entries.
fn convert_nodes(nodes: &Vec<node_types::Entry>) -> Vec<dbscheme::Entry> {
let mut entries: Vec<dbscheme::Entry> = vec![
create_location_table(),
create_location_union(),
create_locations_default_table(),
create_sourceline_union(),
create_numlines_table(),
create_files_table(),
create_folders_table(),
create_container_union(),
create_containerparent_table(),
create_source_location_prefix_table(),
];
let mut top_members: Vec<String> = Vec::new();
@@ -212,23 +219,103 @@ fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::
dbscheme::write(&language.name, &mut file, &entries)
}
fn create_location_table() -> dbscheme::Entry {
fn create_location_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "location".to_owned(),
members: vec!["location_default".to_owned()],
})
}
fn create_files_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "location".to_string(),
name: "files".to_owned(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_owned(),
ql_type: ql::Type::AtType("file".to_owned()),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "simple".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "ext".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::Int,
name: "fromSource".to_owned(),
unique: false,
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
})
}
fn create_folders_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "folders".to_owned(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_owned(),
ql_type: ql::Type::AtType("folder".to_owned()),
ql_type_is_ref: false,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "name".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
dbscheme::Column {
db_type: dbscheme::DbColumnType::String,
name: "simple".to_owned(),
unique: false,
ql_type: ql::Type::String,
ql_type_is_ref: true,
},
],
})
}
fn create_locations_default_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "locations_default".to_string(),
keysets: None,
columns: vec![
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "id".to_string(),
ql_type: ql::Type::AtType("location".to_string()),
ql_type: ql::Type::AtType("location_default".to_string()),
ql_type_is_ref: false,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::String,
name: "file_path".to_string(),
ql_type: ql::Type::String,
db_type: dbscheme::DbColumnType::Int,
name: "file".to_string(),
ql_type: ql::Type::AtType("file".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
@@ -263,6 +350,80 @@ fn create_location_table() -> dbscheme::Entry {
})
}
fn create_sourceline_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "sourceline".to_owned(),
members: vec!["file".to_owned()],
})
}
fn create_numlines_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "numlines".to_owned(),
columns: vec![
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "element_id".to_string(),
ql_type: ql::Type::AtType("sourceline".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_lines".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_code".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "num_comment".to_string(),
ql_type: ql::Type::Int,
ql_type_is_ref: true,
},
],
keysets: None,
})
}
fn create_container_union() -> dbscheme::Entry {
dbscheme::Entry::Union(dbscheme::Union {
name: "container".to_owned(),
members: vec!["folder".to_owned(), "file".to_owned()],
})
}
fn create_containerparent_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "containerparent".to_owned(),
columns: vec![
dbscheme::Column {
unique: false,
db_type: dbscheme::DbColumnType::Int,
name: "parent".to_string(),
ql_type: ql::Type::AtType("container".to_owned()),
ql_type_is_ref: true,
},
dbscheme::Column {
unique: true,
db_type: dbscheme::DbColumnType::Int,
name: "child".to_string(),
ql_type: ql::Type::AtType("container".to_owned()),
ql_type_is_ref: true,
},
],
keysets: None,
})
}
fn create_source_location_prefix_table() -> dbscheme::Entry {
dbscheme::Entry::Table(dbscheme::Table {
name: "sourceLocationPrefix".to_string(),

View File

@@ -79,7 +79,10 @@ pub enum Expression {
String(String),
Pred(String, Vec<Expression>),
Or(Vec<Expression>),
And(Vec<Expression>),
Equals(Box<Expression>, Box<Expression>),
Exists(Vec<FormalParameter>, Box<Expression>),
Dot(Box<Expression>, String, Vec<Expression>),
}
impl fmt::Display for Expression {
@@ -105,12 +108,45 @@ impl fmt::Display for Expression {
if index > 0 {
write!(f, " or ")?;
}
write!(f, "{}", disjunct)?;
write!(f, "({})", disjunct)?;
}
Ok(())
}
}
Expression::And(conjuncts) => {
if conjuncts.is_empty() {
write!(f, "any()")
} else {
for (index, conjunct) in conjuncts.iter().enumerate() {
if index > 0 {
write!(f, " and ")?;
}
write!(f, "{}", conjunct)?;
}
Ok(())
}
}
Expression::Equals(a, b) => write!(f, "{} = {}", a, b),
Expression::Exists(params, formula) => {
write!(f, "exists(")?;
for (index, param) in params.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", param)?;
}
write!(f, " | {})", formula)
}
Expression::Dot(x, member_pred, args) => {
write!(f, "{}.{}(", x, member_pred)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ", ")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
}
}
}

View File

@@ -168,16 +168,32 @@ fn create_location_class() -> ql::Class {
param_type: ql::Type::Int,
},
],
body: ql::Expression::Pred(
"location".to_owned(),
vec![
ql::Expression::Var("this".to_owned()),
ql::Expression::Var("filePath".to_owned()),
ql::Expression::Var("startLine".to_owned()),
ql::Expression::Var("startColumn".to_owned()),
ql::Expression::Var("endLine".to_owned()),
ql::Expression::Var("endColumn".to_owned()),
],
body: ql::Expression::Exists(
vec![ql::FormalParameter {
param_type: ql::Type::Normal("File".to_owned()),
name: "f".to_owned(),
}],
Box::new(ql::Expression::And(vec![
ql::Expression::Pred(
"locations_default".to_owned(),
vec![
ql::Expression::Var("this".to_owned()),
ql::Expression::Var("f".to_owned()),
ql::Expression::Var("startLine".to_owned()),
ql::Expression::Var("startColumn".to_owned()),
ql::Expression::Var("endLine".to_owned()),
ql::Expression::Var("endColumn".to_owned()),
],
),
ql::Expression::Equals(
Box::new(ql::Expression::Var("filePath".to_owned())),
Box::new(ql::Expression::Dot(
Box::new(ql::Expression::Var("f".to_owned())),
"getAbsolutePath".to_owned(),
vec![],
)),
),
])),
),
};
ql::Class {

View File

@@ -3,13 +3,22 @@
* Automatically generated from the tree-sitter grammar; do not edit
*/
class File extends @file {
string getAbsolutePath() { files(this, result, _, _, _) }
string toString() { result = this.getAbsolutePath() }
}
class Location extends @location {
string toString() { result = "Location" }
predicate hasLocationInfo(
string filePath, int startLine, int startColumn, int endLine, int endColumn
) {
location(this, filePath, startLine, startColumn, endLine, endColumn)
exists(File f |
locations_default(this, f, startLine, startColumn, endLine, endColumn) and
filePath = f.getAbsolutePath()
)
}
}

View File

@@ -1,15 +1,47 @@
// CodeQL database schema for Ruby
// Automatically generated from the tree-sitter grammar; do not edit
location(
unique int id: @location,
string file_path: string ref,
@location = @location_default
locations_default(
unique int id: @location_default,
int file: @file ref,
int start_line: int ref,
int start_column: int ref,
int end_line: int ref,
int end_column: int ref
);
@sourceline = @file
numlines(
int element_id: @sourceline ref,
int num_lines: int ref,
int num_code: int ref,
int num_comment: int ref
);
files(
unique int id: @file,
string name: string ref,
string simple: string ref,
string ext: string ref,
int fromSource: int ref
);
folders(
unique int id: @folder,
string name: string ref,
string simple: string ref
);
@container = @folder | @file
containerparent(
int parent: @container ref,
unique int child: @container ref
);
sourceLocationPrefix(
string prefix: string ref
);