Merge branch 'main' into encoding

This commit is contained in:
Arthur Baars
2022-07-21 21:21:17 +02:00
committed by GitHub
20 changed files with 694 additions and 486 deletions

View File

@@ -77,7 +77,7 @@ class ExecState extends DataFlow::FlowState {
ExecState() { ExecState() {
this = this =
"ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and "ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and
interestingConcatenation(fst, snd) interestingConcatenation(pragma[only_bind_into](fst), pragma[only_bind_into](snd))
} }
DataFlow::Node getFstNode() { result = fst } DataFlow::Node getFstNode() { result = fst }

View File

@@ -2,7 +2,54 @@ function RegisterExtractorPack(id)
local extractor = GetPlatformToolsDirectory() .. local extractor = GetPlatformToolsDirectory() ..
'Semmle.Extraction.CSharp.Driver' 'Semmle.Extraction.CSharp.Driver'
if OperatingSystem == 'windows' then extractor = extractor .. '.exe' end if OperatingSystem == 'windows' then extractor = extractor .. '.exe' end
function DotnetMatcherBuild(compilerName, compilerPath, compilerArguments,
_languageId)
if compilerName ~= 'dotnet' and compilerName ~= 'dotnet.exe' then
return nil
end
-- The dotnet CLI has the following usage instructions:
-- dotnet [sdk-options] [command] [command-options] [arguments]
-- we are interested in dotnet build, which has the following usage instructions:
-- dotnet [options] build [<PROJECT | SOLUTION>...]
-- For now, parse the command line as follows:
-- Everything that starts with `-` (or `/`) will be ignored.
-- The first non-option argument is treated as the command.
-- if that's `build`, we append `/p:UseSharedCompilation=false` to the command line,
-- otherwise we do nothing.
local match = false
local argv = compilerArguments.argv
if OperatingSystem == 'windows' then
-- let's hope that this split matches the escaping rules `dotnet` applies to command line arguments
-- or, at least, that it is close enough
argv =
NativeArgumentsToArgv(compilerArguments.nativeArgumentPointer)
end
for i, arg in ipairs(argv) do
-- dotnet options start with either - or / (both are legal)
local firstCharacter = string.sub(arg, 1, 1)
if not (firstCharacter == '-') and not (firstCharacter == '/') then
Log(1, 'Dotnet subcommand detected: %s', arg)
if arg == 'build' then match = true end
break
end
end
if match then
return {
order = ORDER_REPLACE,
invocation = BuildExtractorInvocation(id, compilerPath,
compilerPath,
compilerArguments, nil, {
'/p:UseSharedCompilation=false'
})
}
end
return nil
end
local windowsMatchers = { local windowsMatchers = {
DotnetMatcherBuild,
CreatePatternMatcher({'^dotnet%.exe$'}, MatchCompilerName, extractor, { CreatePatternMatcher({'^dotnet%.exe$'}, MatchCompilerName, extractor, {
prepend = {'--dotnetexec', '--cil'}, prepend = {'--dotnetexec', '--cil'},
order = ORDER_BEFORE order = ORDER_BEFORE
@@ -10,22 +57,21 @@ function RegisterExtractorPack(id)
CreatePatternMatcher({'^csc.*%.exe$'}, MatchCompilerName, extractor, { CreatePatternMatcher({'^csc.*%.exe$'}, MatchCompilerName, extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'}, prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE order = ORDER_BEFORE
}), }),
CreatePatternMatcher({'^fakes.*%.exe$', 'moles.*%.exe'}, CreatePatternMatcher({'^fakes.*%.exe$', 'moles.*%.exe'},
MatchCompilerName, nil, {trace = false}) MatchCompilerName, nil, {trace = false})
} }
local posixMatchers = { local posixMatchers = {
CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName, DotnetMatcherBuild,
extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE
}),
CreatePatternMatcher({'^mono', '^dotnet$'}, MatchCompilerName, CreatePatternMatcher({'^mono', '^dotnet$'}, MatchCompilerName,
extractor, { extractor, {
prepend = {'--dotnetexec', '--cil'}, prepend = {'--dotnetexec', '--cil'},
order = ORDER_BEFORE order = ORDER_BEFORE
}),
CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName,
extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE
}), function(compilerName, compilerPath, compilerArguments, _languageId) }), function(compilerName, compilerPath, compilerArguments, _languageId)
if MatchCompilerName('^msbuild$', compilerName, compilerPath, if MatchCompilerName('^msbuild$', compilerName, compilerPath,
compilerArguments) or compilerArguments) or
@@ -49,7 +95,6 @@ function RegisterExtractorPack(id)
else else
return posixMatchers return posixMatchers
end end
end end
-- Return a list of minimum supported versions of the configuration file format -- Return a list of minimum supported versions of the configuration file format

View File

@@ -11,14 +11,17 @@ CodeQL.
Languages and compilers Languages and compilers
####################### #######################
CodeQL supports the following languages and compilers. The current versions of the CodeQL CLI (`changelog <https://github.com/github/codeql-cli-binaries/blob/main/CHANGELOG.md>`__, `releases <https://github.com/github/codeql-cli-binaries/releases>`__),
CodeQL library packs (`source <https://github.com/github/codeql/tree/codeql-cli/latest>`__),
and CodeQL bundle (`releases <https://github.com/github/codeql-action/releases>`__)
support the following languages and compilers.
.. include:: ../support/reusables/versions-compilers.rst .. include:: ../support/reusables/versions-compilers.rst
Frameworks and libraries Frameworks and libraries
######################## ########################
The libraries and queries in the current version of CodeQL have been explicitly checked against the libraries and frameworks listed below. The current versions of the CodeQL library and query packs (`source <https://github.com/github/codeql/tree/codeql-cli/latest>`__) have been explicitly checked against the libraries and frameworks listed below.
.. pull-quote:: .. pull-quote::

View File

@@ -1,6 +1,10 @@
C and C++ built-in support C and C++ built-in support
================================ ================================
Provided by the current versions of the
CodeQL query pack ``codeql/cpp-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/src>`__)
and the CodeQL library pack ``codeql/cpp-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable
@@ -14,6 +18,10 @@ C and C++ built-in support
C# built-in support C# built-in support
================================ ================================
Provided by the current versions of the
CodeQL query pack ``codeql/csharp-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/src>`__)
and the CodeQL library pack ``codeql/csharp-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable
@@ -33,6 +41,10 @@ C# built-in support
Go built-in support Go built-in support
================================ ================================
Provided by the current versions of the
CodeQL query pack ``codeql/go-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/src>`__)
and the CodeQL library pack ``codeql/go-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable
@@ -84,6 +96,10 @@ Go built-in support
Java built-in support Java built-in support
================================== ==================================
Provided by the current versions of the
CodeQL query pack ``codeql/java-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/src>`__)
and the CodeQL library pack ``codeql/java-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable
@@ -113,6 +129,10 @@ Java built-in support
JavaScript and TypeScript built-in support JavaScript and TypeScript built-in support
======================================================= =======================================================
Provided by the current versions of the
CodeQL query pack ``codeql/javascript-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/src>`__)
and the CodeQL library pack ``codeql/javascript-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable
@@ -156,6 +176,10 @@ JavaScript and TypeScript built-in support
Python built-in support Python built-in support
==================================== ====================================
Provided by the current versions of the
CodeQL query pack ``codeql/python-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/src>`__)
and the CodeQL library pack ``codeql/python-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/lib>`__).
.. csv-table:: .. csv-table::
:header-rows: 1 :header-rows: 1
:class: fullWidthTable :class: fullWidthTable

BIN
ruby/Cargo.lock generated

Binary file not shown.

View File

@@ -11,7 +11,7 @@ flate2 = "1.0"
node-types = { path = "../node-types" } node-types = { path = "../node-types" }
tree-sitter = "0.19" tree-sitter = "0.19"
tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" } tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" } tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" }
clap = "3.0" clap = "3.0"
tracing = "0.1" tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }

View File

@@ -1,114 +1,74 @@
use crate::trap;
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName}; use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
use std::borrow::Cow;
use std::collections::BTreeMap as Map; use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set; use std::collections::BTreeSet as Set;
use std::fmt; use std::fmt;
use std::io::Write;
use std::path::Path; use std::path::Path;
use tracing::{error, info, span, Level}; use tracing::{error, info, span, Level};
use tree_sitter::{Language, Node, Parser, Range, Tree}; use tree_sitter::{Language, Node, Parser, Range, Tree};
pub struct TrapWriter { pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
/// The accumulated trap entries let (file_label, fresh) =
trap_output: Vec<TrapEntry>, writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
/// A counter for generating fresh labels
counter: u32,
/// cache of global keys
global_keys: std::collections::HashMap<String, Label>,
}
pub fn new_trap_writer() -> TrapWriter {
TrapWriter {
counter: 0,
trap_output: Vec::new(),
global_keys: std::collections::HashMap::new(),
}
}
impl TrapWriter {
/// Gets a label that will hold the unique ID of the passed string at import time.
/// This can be used for incrementally importable TRAP files -- use globally unique
/// strings to compute a unique ID for table tuples.
///
/// Note: You probably want to make sure that the key strings that you use are disjoint
/// for disjoint column types; the standard way of doing this is to prefix (or append)
/// the column type name to the ID. Thus, you might identify methods in Java by the
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
fn fresh_id(&mut self) -> Label {
let label = Label(self.counter);
self.counter += 1;
self.trap_output.push(TrapEntry::FreshId(label));
label
}
fn global_id(&mut self, key: &str) -> (Label, bool) {
if let Some(label) = self.global_keys.get(key) {
return (*label, false);
}
let label = Label(self.counter);
self.counter += 1;
self.global_keys.insert(key.to_owned(), label);
self.trap_output
.push(TrapEntry::MapLabelToKey(label, key.to_owned()));
(label, true)
}
fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
self.trap_output
.push(TrapEntry::GenericTuple(table_name.to_owned(), args))
}
fn populate_file(&mut self, absolute_path: &Path) -> Label {
let (file_label, fresh) = self.global_id(&full_id_for_file(absolute_path));
if fresh { if fresh {
self.add_tuple( writer.add_tuple(
"files", "files",
vec![ vec![
Arg::Label(file_label), trap::Arg::Label(file_label),
Arg::String(normalize_path(absolute_path)), trap::Arg::String(normalize_path(absolute_path)),
], ],
); );
self.populate_parent_folders(file_label, absolute_path.parent()); populate_parent_folders(writer, file_label, absolute_path.parent());
} }
file_label file_label
} }
fn populate_empty_file(&mut self) -> Label { fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
let (file_label, fresh) = self.global_id("empty;sourcefile"); let (file_label, fresh) = writer.global_id("empty;sourcefile");
if fresh { if fresh {
self.add_tuple( writer.add_tuple(
"files", "files",
vec![Arg::Label(file_label), Arg::String("".to_string())], vec![
trap::Arg::Label(file_label),
trap::Arg::String("".to_string()),
],
); );
} }
file_label file_label
} }
pub fn populate_empty_location(&mut self) { pub fn populate_empty_location(writer: &mut trap::Writer) {
let file_label = self.populate_empty_file(); let file_label = populate_empty_file(writer);
self.location(file_label, 0, 0, 0, 0); location(writer, file_label, 0, 0, 0, 0);
} }
fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) { pub fn populate_parent_folders(
writer: &mut trap::Writer,
child_label: trap::Label,
path: Option<&Path>,
) {
let mut path = path; let mut path = path;
let mut child_label = child_label; let mut child_label = child_label;
loop { loop {
match path { match path {
None => break, None => break,
Some(folder) => { Some(folder) => {
let (folder_label, fresh) = self.global_id(&full_id_for_folder(folder)); let (folder_label, fresh) =
self.add_tuple( writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
writer.add_tuple(
"containerparent", "containerparent",
vec![Arg::Label(folder_label), Arg::Label(child_label)], vec![
trap::Arg::Label(folder_label),
trap::Arg::Label(child_label),
],
); );
if fresh { if fresh {
self.add_tuple( writer.add_tuple(
"folders", "folders",
vec![ vec![
Arg::Label(folder_label), trap::Arg::Label(folder_label),
Arg::String(normalize_path(folder)), trap::Arg::String(normalize_path(folder)),
], ],
); );
path = folder.parent(); path = folder.parent();
@@ -122,112 +82,75 @@ impl TrapWriter {
} }
fn location( fn location(
&mut self, writer: &mut trap::Writer,
file_label: Label, file_label: trap::Label,
start_line: usize, start_line: usize,
start_column: usize, start_column: usize,
end_line: usize, end_line: usize,
end_column: usize, end_column: usize,
) -> Label { ) -> trap::Label {
let (loc_label, fresh) = self.global_id(&format!( let (loc_label, fresh) = writer.global_id(&format!(
"loc,{{{}}},{},{},{},{}", "loc,{{{}}},{},{},{},{}",
file_label, start_line, start_column, end_line, end_column file_label, start_line, start_column, end_line, end_column
)); ));
if fresh { if fresh {
self.add_tuple( writer.add_tuple(
"locations_default", "locations_default",
vec![ vec![
Arg::Label(loc_label), trap::Arg::Label(loc_label),
Arg::Label(file_label), trap::Arg::Label(file_label),
Arg::Int(start_line), trap::Arg::Int(start_line),
Arg::Int(start_column), trap::Arg::Int(start_column),
Arg::Int(end_line), trap::Arg::Int(end_line),
Arg::Int(end_column), trap::Arg::Int(end_column),
], ],
); );
} }
loc_label loc_label
} }
fn comment(&mut self, text: String) {
self.trap_output.push(TrapEntry::Comment(text));
}
pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> {
write!(writer, "{}", Program(self.trap_output))
}
}
/// Extracts the source file at `path`, which is assumed to be canonicalized. /// Extracts the source file at `path`, which is assumed to be canonicalized.
pub fn extract( pub fn extract(
language: Language, language: Language,
language_prefix: &str, language_prefix: &str,
schema: &NodeTypeMap, schema: &NodeTypeMap,
trap_writer: &mut TrapWriter, trap_writer: &mut trap::Writer,
path: &Path, path: &Path,
source: &[u8], source: &[u8],
ranges: &[Range], ranges: &[Range],
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let path_str = format!("{}", path.display());
let span = span!( let span = span!(
Level::TRACE, Level::TRACE,
"extract", "extract",
file = %path.display() file = %path_str
); );
let _enter = span.enter(); let _enter = span.enter();
info!("extracting: {}", path.display()); info!("extracting: {}", path_str);
let mut parser = Parser::new(); let mut parser = Parser::new();
parser.set_language(language).unwrap(); parser.set_language(language).unwrap();
parser.set_included_ranges(ranges).unwrap(); parser.set_included_ranges(ranges).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file"); let tree = parser.parse(&source, None).expect("Failed to parse file");
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display())); trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
let file_label = &trap_writer.populate_file(path); let file_label = populate_file(trap_writer, path);
let mut visitor = Visitor { let mut visitor = Visitor::new(
source, source,
trap_writer, trap_writer,
// TODO: should we handle path strings that are not valid UTF8 better? // TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()), &path_str,
file_label: *file_label, file_label,
toplevel_child_counter: 0,
stack: Vec::new(),
language_prefix, language_prefix,
schema, schema,
}; );
traverse(&tree, &mut visitor); traverse(&tree, &mut visitor);
parser.reset(); parser.reset();
Ok(()) Ok(())
} }
/// Escapes a string for use in a TRAP key, by replacing special characters with
/// HTML entities.
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
fn needs_escaping(c: char) -> bool {
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
}
let key = key.into();
if key.contains(needs_escaping) {
let mut escaped = String::with_capacity(2 * key.len());
for c in key.chars() {
match c {
'&' => escaped.push_str("&amp;"),
'{' => escaped.push_str("&lbrace;"),
'}' => escaped.push_str("&rbrace;"),
'"' => escaped.push_str("&quot;"),
'@' => escaped.push_str("&commat;"),
'#' => escaped.push_str("&num;"),
_ => escaped.push(c),
}
}
Cow::Owned(escaped)
} else {
key
}
}
/// Normalizes the path according the common CodeQL specification. Assumes that /// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`. /// `path` has already been canonicalized using `std::fs::canonicalize`.
fn normalize_path(path: &Path) -> String { fn normalize_path(path: &Path) -> String {
@@ -267,34 +190,28 @@ fn normalize_path(path: &Path) -> String {
} }
} }
fn full_id_for_file(path: &Path) -> String {
format!("{};sourcefile", escape_key(&normalize_path(path)))
}
fn full_id_for_folder(path: &Path) -> String {
format!("{};folder", escape_key(&normalize_path(path)))
}
struct ChildNode { struct ChildNode {
field_name: Option<&'static str>, field_name: Option<&'static str>,
label: Label, label: trap::Label,
type_name: TypeName, type_name: TypeName,
} }
struct Visitor<'a> { struct Visitor<'a> {
/// The file path of the source code (as string) /// The file path of the source code (as string)
path: String, path: &'a str,
/// The label to use whenever we need to refer to the `@file` entity of this /// The label to use whenever we need to refer to the `@file` entity of this
/// source file. /// source file.
file_label: Label, file_label: trap::Label,
/// The source code as a UTF-8 byte array /// The source code as a UTF-8 byte array
source: &'a [u8], source: &'a [u8],
/// A TrapWriter to accumulate trap entries /// A trap::Writer to accumulate trap entries
trap_writer: &'a mut TrapWriter, trap_writer: &'a mut trap::Writer,
/// A counter for top-level child nodes /// A counter for top-level child nodes
toplevel_child_counter: usize, toplevel_child_counter: usize,
/// Language prefix /// Language-specific name of the AST info table
language_prefix: &'a str, ast_node_info_table_name: String,
/// Language-specific name of the tokeninfo table
tokeninfo_table_name: String,
/// A lookup table from type name to node types /// A lookup table from type name to node types
schema: &'a NodeTypeMap, schema: &'a NodeTypeMap,
/// A stack for gathering information from child nodes. Whenever a node is /// A stack for gathering information from child nodes. Whenever a node is
@@ -303,27 +220,48 @@ struct Visitor<'a> {
/// node the list containing the child data is popped from the stack and /// node the list containing the child data is popped from the stack and
/// matched against the dbscheme for the node. If the expectations are met /// matched against the dbscheme for the node. If the expectations are met
/// the corresponding row definitions are added to the trap_output. /// the corresponding row definitions are added to the trap_output.
stack: Vec<(Label, usize, Vec<ChildNode>)>, stack: Vec<(trap::Label, usize, Vec<ChildNode>)>,
}
impl<'a> Visitor<'a> {
fn new(
source: &'a [u8],
trap_writer: &'a mut trap::Writer,
path: &'a str,
file_label: trap::Label,
language_prefix: &str,
schema: &'a NodeTypeMap,
) -> Visitor<'a> {
Visitor {
path,
file_label,
source,
trap_writer,
toplevel_child_counter: 0,
ast_node_info_table_name: format!("{}_ast_node_info", language_prefix),
tokeninfo_table_name: format!("{}_tokeninfo", language_prefix),
schema,
stack: Vec::new(),
}
} }
impl Visitor<'_> {
fn record_parse_error( fn record_parse_error(
&mut self, &mut self,
error_message: String, error_message: String,
full_error_message: String, full_error_message: String,
loc: Label, loc: trap::Label,
) { ) {
error!("{}", full_error_message); error!("{}", full_error_message);
let id = self.trap_writer.fresh_id(); let id = self.trap_writer.fresh_id();
self.trap_writer.add_tuple( self.trap_writer.add_tuple(
"diagnostics", "diagnostics",
vec![ vec![
Arg::Label(id), trap::Arg::Label(id),
Arg::Int(40), // severity 40 = error trap::Arg::Int(40), // severity 40 = error
Arg::String("parse_error".to_string()), trap::Arg::String("parse_error".to_string()),
Arg::String(error_message), trap::Arg::String(error_message),
Arg::String(full_error_message), trap::Arg::String(full_error_message),
Arg::Label(loc), trap::Arg::Label(loc),
], ],
); );
} }
@@ -335,7 +273,8 @@ impl Visitor<'_> {
node: Node, node: Node,
) { ) {
let (start_line, start_column, end_line, end_column) = location_for(self.source, node); let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
let loc = self.trap_writer.location( let loc = location(
self.trap_writer,
self.file_label, self.file_label,
start_line, start_line,
start_column, start_column,
@@ -374,7 +313,8 @@ impl Visitor<'_> {
} }
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack"); let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
let (start_line, start_column, end_line, end_column) = location_for(self.source, node); let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
let loc = self.trap_writer.location( let loc = location(
self.trap_writer,
self.file_label, self.file_label,
start_line, start_line,
start_column, start_column,
@@ -402,19 +342,19 @@ impl Visitor<'_> {
match &table.kind { match &table.kind {
EntryKind::Token { kind_id, .. } => { EntryKind::Token { kind_id, .. } => {
self.trap_writer.add_tuple( self.trap_writer.add_tuple(
&format!("{}_ast_node_info", self.language_prefix), &self.ast_node_info_table_name,
vec![ vec![
Arg::Label(id), trap::Arg::Label(id),
Arg::Label(parent_id), trap::Arg::Label(parent_id),
Arg::Int(parent_index), trap::Arg::Int(parent_index),
Arg::Label(loc), trap::Arg::Label(loc),
], ],
); );
self.trap_writer.add_tuple( self.trap_writer.add_tuple(
&format!("{}_tokeninfo", self.language_prefix), &self.tokeninfo_table_name,
vec![ vec![
Arg::Label(id), trap::Arg::Label(id),
Arg::Int(*kind_id), trap::Arg::Int(*kind_id),
sliced_source_arg(self.source, node), sliced_source_arg(self.source, node),
], ],
); );
@@ -425,15 +365,15 @@ impl Visitor<'_> {
} => { } => {
if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) { if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) {
self.trap_writer.add_tuple( self.trap_writer.add_tuple(
&format!("{}_ast_node_info", self.language_prefix), &self.ast_node_info_table_name,
vec![ vec![
Arg::Label(id), trap::Arg::Label(id),
Arg::Label(parent_id), trap::Arg::Label(parent_id),
Arg::Int(parent_index), trap::Arg::Int(parent_index),
Arg::Label(loc), trap::Arg::Label(loc),
], ],
); );
let mut all_args = vec![Arg::Label(id)]; let mut all_args = vec![trap::Arg::Label(id)];
all_args.extend(args); all_args.extend(args);
self.trap_writer.add_tuple(table_name, all_args); self.trap_writer.add_tuple(table_name, all_args);
} }
@@ -472,9 +412,9 @@ impl Visitor<'_> {
node: &Node, node: &Node,
fields: &[Field], fields: &[Field],
child_nodes: &[ChildNode], child_nodes: &[ChildNode],
parent_id: Label, parent_id: trap::Label,
) -> Option<Vec<Arg>> { ) -> Option<Vec<trap::Arg>> {
let mut map: Map<&Option<String>, (&Field, Vec<Arg>)> = Map::new(); let mut map: Map<&Option<String>, (&Field, Vec<trap::Arg>)> = Map::new();
for field in fields { for field in fields {
map.insert(&field.name, (field, Vec::new())); map.insert(&field.name, (field, Vec::new()));
} }
@@ -488,9 +428,9 @@ impl Visitor<'_> {
{ {
// We can safely unwrap because type_matches checks the key is in the map. // We can safely unwrap because type_matches checks the key is in the map.
let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap(); let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap();
values.push(Arg::Int(*int_value)); values.push(trap::Arg::Int(*int_value));
} else { } else {
values.push(Arg::Label(child_node.label)); values.push(trap::Arg::Label(child_node.label));
} }
} else if field.name.is_some() { } else if field.name.is_some() {
let error_message = format!( let error_message = format!(
@@ -569,9 +509,9 @@ impl Visitor<'_> {
); );
break; break;
} }
let mut args = vec![Arg::Label(parent_id)]; let mut args = vec![trap::Arg::Label(parent_id)];
if *has_index { if *has_index {
args.push(Arg::Int(index)) args.push(trap::Arg::Int(index))
} }
args.push(child_value.clone()); args.push(child_value.clone());
self.trap_writer.add_tuple(table_name, args); self.trap_writer.add_tuple(table_name, args);
@@ -625,9 +565,9 @@ impl Visitor<'_> {
} }
// Emit a slice of a source file as an Arg. // Emit a slice of a source file as an Arg.
fn sliced_source_arg(source: &[u8], n: Node) -> Arg { fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
let range = n.byte_range(); let range = n.byte_range();
Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
} }
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated. // Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
@@ -699,59 +639,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) {
} }
} }
pub struct Program(Vec<TrapEntry>);
impl fmt::Display for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut text = String::new();
for trap_entry in &self.0 {
text.push_str(&format!("{}\n", trap_entry));
}
write!(f, "{}", text)
}
}
enum TrapEntry {
/// Maps the label to a fresh id, e.g. `#123=*`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7=@"foo"`.
MapLabelToKey(Label, String),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for TrapEntry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TrapEntry::FreshId(label) => write!(f, "{}=*", label),
TrapEntry::MapLabelToKey(label, key) => {
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
}
TrapEntry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ",")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
TrapEntry::Comment(line) => write!(f, "// {}", line),
}
}
}
#[derive(Debug, Copy, Clone)]
// Identifiers of the form #0, #1...
struct Label(u32);
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "#{:x}", self.0)
}
}
// Numeric indices. // Numeric indices.
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
struct Index(usize); struct Index(usize);
@@ -761,69 +648,3 @@ impl fmt::Display for Index {
write!(f, "{}", self.0) write!(f, "{}", self.0)
} }
} }
// Some untyped argument to a TrapEntry.
#[derive(Debug, Clone)]
enum Arg {
Label(Label),
Int(usize),
String(String),
}
const MAX_STRLEN: usize = 1048576;
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Arg::Label(x) => write!(f, "{}", x),
Arg::Int(x) => write!(f, "{}", x),
Arg::String(x) => write!(
f,
"\"{}\"",
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
),
}
}
}
/// Limit the length (in bytes) of a string. If the string's length in bytes is
/// less than or equal to the limit then the entire string is returned. Otherwise
/// the string is sliced at the provided limit. If there is a multi-byte character
/// at the limit then the returned slice will be slightly shorter than the limit to
/// avoid splitting that multi-byte character.
fn limit_string(string: &str, max_size: usize) -> &str {
if string.len() <= max_size {
return string;
}
let p = string.as_bytes();
let mut index = max_size;
// We want to clip the string at [max_size]; however, the character at that position
// may span several bytes. We need to find the first byte of the character. In UTF-8
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
// Therefore we decrement the index as long as there are bytes matching this pattern.
// This ensures we cut the string at the border between one character and another.
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
index -= 1;
}
&string[0..index]
}
#[test]
fn limit_string_test() {
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
}
#[test]
fn escape_key_test() {
assert_eq!("foo!", escape_key("foo!"));
assert_eq!("foo&lbrace;&rbrace;", escape_key("foo{}"));
assert_eq!("&lbrace;&rbrace;", escape_key("{}"));
assert_eq!("", escape_key(""));
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
assert_eq!(
"/path/to/foo&amp;&lbrace;&rbrace;&quot;&commat;&num;.rb",
escape_key("/path/to/foo&{}\"@#.rb")
);
}

View File

@@ -1,4 +1,5 @@
mod extractor; mod extractor;
mod trap;
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
@@ -6,50 +7,13 @@ extern crate num_cpus;
use clap::arg; use clap::arg;
use encoding::{self}; use encoding::{self};
use flate2::write::GzEncoder;
use rayon::prelude::*; use rayon::prelude::*;
use std::borrow::Cow; use std::borrow::Cow;
use std::fs; use std::fs;
use std::io::{BufRead, BufWriter}; use std::io::BufRead;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tree_sitter::{Language, Parser, Range}; use tree_sitter::{Language, Parser, Range};
enum TrapCompression {
None,
Gzip,
}
impl TrapCompression {
fn from_env() -> TrapCompression {
match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") {
Ok(method) => match TrapCompression::from_string(&method) {
Some(c) => c,
None => {
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
TrapCompression::Gzip
}
},
// Default compression method if the env var isn't set:
Err(_) => TrapCompression::Gzip,
}
}
fn from_string(s: &str) -> Option<TrapCompression> {
match s.to_lowercase().as_ref() {
"none" => Some(TrapCompression::None),
"gzip" => Some(TrapCompression::Gzip),
_ => None,
}
}
fn extension(&self) -> &str {
match self {
TrapCompression::None => "trap",
TrapCompression::Gzip => "trap.gz",
}
}
}
/** /**
* Gets the number of threads the extractor should use, by reading the * Gets the number of threads the extractor should use, by reading the
* CODEQL_THREADS environment variable and using it as described in the * CODEQL_THREADS environment variable and using it as described in the
@@ -141,7 +105,7 @@ fn main() -> std::io::Result<()> {
.value_of("output-dir") .value_of("output-dir")
.expect("missing --output-dir"); .expect("missing --output-dir");
let trap_dir = PathBuf::from(trap_dir); let trap_dir = PathBuf::from(trap_dir);
let trap_compression = TrapCompression::from_env(); let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION");
let file_list = matches.value_of("file-list").expect("missing --file-list"); let file_list = matches.value_of("file-list").expect("missing --file-list");
let file_list = fs::File::open(file_list)?; let file_list = fs::File::open(file_list)?;
@@ -165,7 +129,7 @@ fn main() -> std::io::Result<()> {
let mut source = std::fs::read(&path)?; let mut source = std::fs::read(&path)?;
let mut needs_conversion = false; let mut needs_conversion = false;
let code_ranges; let code_ranges;
let mut trap_writer = extractor::new_trap_writer(); let mut trap_writer = trap::Writer::new();
if path.extension().map_or(false, |x| x == "erb") { if path.extension().map_or(false, |x| x == "erb") {
tracing::info!("scanning: {}", path.display()); tracing::info!("scanning: {}", path.display());
extractor::extract( extractor::extract(
@@ -246,33 +210,25 @@ fn main() -> std::io::Result<()> {
} else { } else {
std::fs::copy(&path, &src_archive_file)?; std::fs::copy(&path, &src_archive_file)?;
} }
write_trap(&trap_dir, path, trap_writer, &trap_compression) write_trap(&trap_dir, path, &trap_writer, trap_compression)
}) })
.expect("failed to extract files"); .expect("failed to extract files");
let path = PathBuf::from("extras"); let path = PathBuf::from("extras");
let mut trap_writer = extractor::new_trap_writer(); let mut trap_writer = trap::Writer::new();
trap_writer.populate_empty_location(); extractor::populate_empty_location(&mut trap_writer);
write_trap(&trap_dir, path, trap_writer, &trap_compression) write_trap(&trap_dir, path, &trap_writer, trap_compression)
} }
fn write_trap( fn write_trap(
trap_dir: &Path, trap_dir: &Path,
path: PathBuf, path: PathBuf,
trap_writer: extractor::TrapWriter, trap_writer: &trap::Writer,
trap_compression: &TrapCompression, trap_compression: trap::Compression,
) -> std::io::Result<()> { ) -> std::io::Result<()> {
let trap_file = path_for(trap_dir, &path, trap_compression.extension()); let trap_file = path_for(trap_dir, &path, trap_compression.extension());
std::fs::create_dir_all(&trap_file.parent().unwrap())?; std::fs::create_dir_all(&trap_file.parent().unwrap())?;
let trap_file = std::fs::File::create(&trap_file)?; trap_writer.write_to_file(&trap_file, trap_compression)
let mut trap_file = BufWriter::new(trap_file);
match trap_compression {
TrapCompression::None => trap_writer.output(&mut trap_file),
TrapCompression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
trap_writer.output(&mut compressed_writer)
}
}
} }
fn scan_erb( fn scan_erb(

272
ruby/extractor/src/trap.rs Normal file
View File

@@ -0,0 +1,272 @@
use std::borrow::Cow;
use std::fmt;
use std::io::{BufWriter, Write};
use std::path::Path;
use flate2::write::GzEncoder;
pub struct Writer {
/// The accumulated trap entries
trap_output: Vec<Entry>,
/// A counter for generating fresh labels
counter: u32,
/// cache of global keys
global_keys: std::collections::HashMap<String, Label>,
}
impl Writer {
pub fn new() -> Writer {
Writer {
counter: 0,
trap_output: Vec::new(),
global_keys: std::collections::HashMap::new(),
}
}
pub fn fresh_id(&mut self) -> Label {
let label = Label(self.counter);
self.counter += 1;
self.trap_output.push(Entry::FreshId(label));
label
}
/// Gets a label that will hold the unique ID of the passed string at import time.
/// This can be used for incrementally importable TRAP files -- use globally unique
/// strings to compute a unique ID for table tuples.
///
/// Note: You probably want to make sure that the key strings that you use are disjoint
/// for disjoint column types; the standard way of doing this is to prefix (or append)
/// the column type name to the ID. Thus, you might identify methods in Java by the
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
pub fn global_id(&mut self, key: &str) -> (Label, bool) {
if let Some(label) = self.global_keys.get(key) {
return (*label, false);
}
let label = Label(self.counter);
self.counter += 1;
self.global_keys.insert(key.to_owned(), label);
self.trap_output
.push(Entry::MapLabelToKey(label, key.to_owned()));
(label, true)
}
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
self.trap_output
.push(Entry::GenericTuple(table_name.to_owned(), args))
}
pub fn comment(&mut self, text: String) {
self.trap_output.push(Entry::Comment(text));
}
pub fn write_to_file(&self, path: &Path, compression: Compression) -> std::io::Result<()> {
let trap_file = std::fs::File::create(path)?;
let mut trap_file = BufWriter::new(trap_file);
match compression {
Compression::None => self.write_trap_entries(&mut trap_file),
Compression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
self.write_trap_entries(&mut compressed_writer)
}
}
}
fn write_trap_entries<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
for trap_entry in &self.trap_output {
writeln!(file, "{}", trap_entry)?;
}
std::io::Result::Ok(())
}
}
pub enum Entry {
/// Maps the label to a fresh id, e.g. `#123=*`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7=@"foo"`.
MapLabelToKey(Label, String),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Entry::FreshId(label) => write!(f, "{}=*", label),
Entry::MapLabelToKey(label, key) => {
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
}
Entry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ",")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Entry::Comment(line) => write!(f, "// {}", line),
}
}
}
#[derive(Debug, Copy, Clone)]
// Identifiers of the form #0, #1...
pub struct Label(u32);
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "#{:x}", self.0)
}
}
// Some untyped argument to a TrapEntry.
#[derive(Debug, Clone)]
pub enum Arg {
Label(Label),
Int(usize),
String(String),
}
const MAX_STRLEN: usize = 1048576;
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Arg::Label(x) => write!(f, "{}", x),
Arg::Int(x) => write!(f, "{}", x),
Arg::String(x) => write!(
f,
"\"{}\"",
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
),
}
}
}
pub struct Program(Vec<Entry>);
impl fmt::Display for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut text = String::new();
for trap_entry in &self.0 {
text.push_str(&format!("{}\n", trap_entry));
}
write!(f, "{}", text)
}
}
pub fn full_id_for_file(normalized_path: &str) -> String {
format!("{};sourcefile", escape_key(normalized_path))
}
pub fn full_id_for_folder(normalized_path: &str) -> String {
format!("{};folder", escape_key(normalized_path))
}
/// Escapes a string for use in a TRAP key, by replacing special characters with
/// HTML entities.
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
fn needs_escaping(c: char) -> bool {
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
}
let key = key.into();
if key.contains(needs_escaping) {
let mut escaped = String::with_capacity(2 * key.len());
for c in key.chars() {
match c {
'&' => escaped.push_str("&amp;"),
'{' => escaped.push_str("&lbrace;"),
'}' => escaped.push_str("&rbrace;"),
'"' => escaped.push_str("&quot;"),
'@' => escaped.push_str("&commat;"),
'#' => escaped.push_str("&num;"),
_ => escaped.push(c),
}
}
Cow::Owned(escaped)
} else {
key
}
}
/// Limit the length (in bytes) of a string. If the string's length in bytes is
/// less than or equal to the limit then the entire string is returned. Otherwise
/// the string is sliced at the provided limit. If there is a multi-byte character
/// at the limit then the returned slice will be slightly shorter than the limit to
/// avoid splitting that multi-byte character.
fn limit_string(string: &str, max_size: usize) -> &str {
if string.len() <= max_size {
return string;
}
let p = string.as_bytes();
let mut index = max_size;
// We want to clip the string at [max_size]; however, the character at that position
// may span several bytes. We need to find the first byte of the character. In UTF-8
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
// Therefore we decrement the index as long as there are bytes matching this pattern.
// This ensures we cut the string at the border between one character and another.
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
index -= 1;
}
&string[0..index]
}
#[derive(Clone, Copy)]
pub enum Compression {
None,
Gzip,
}
impl Compression {
pub fn from_env(var_name: &str) -> Compression {
match std::env::var(var_name) {
Ok(method) => match Compression::from_string(&method) {
Some(c) => c,
None => {
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
Compression::Gzip
}
},
// Default compression method if the env var isn't set:
Err(_) => Compression::Gzip,
}
}
pub fn from_string(s: &str) -> Option<Compression> {
match s.to_lowercase().as_ref() {
"none" => Some(Compression::None),
"gzip" => Some(Compression::Gzip),
_ => None,
}
}
pub fn extension(&self) -> &str {
match self {
Compression::None => "trap",
Compression::Gzip => "trap.gz",
}
}
}
#[test]
fn limit_string_test() {
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
}
#[test]
fn escape_key_test() {
assert_eq!("foo!", escape_key("foo!"));
assert_eq!("foo&lbrace;&rbrace;", escape_key("foo{}"));
assert_eq!("&lbrace;&rbrace;", escape_key("{}"));
assert_eq!("", escape_key(""));
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
assert_eq!(
"/path/to/foo&amp;&lbrace;&rbrace;&quot;&commat;&num;.rb",
escape_key("/path/to/foo&{}\"@#.rb")
);
}

View File

@@ -12,4 +12,4 @@ node-types = { path = "../node-types" }
tracing = "0.1" tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" } tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" } tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" }

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- Calls to `ActiveRecord::Relation#annotate` are now recognized as`SqlExecution`s so that it will be considered as a sink for queries like rb/sql-injection.

View File

@@ -133,6 +133,11 @@ private Expr sqlFragmentArgument(MethodCall call) {
or or
methodName = "reload" and methodName = "reload" and
result = call.getKeywordArgument("lock") result = call.getKeywordArgument("lock")
or
// Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to
// SQLi if user supplied input is passed in as an argument.
methodName = "annotate" and
result = call.getArgument(_)
) )
) )
} }

View File

@@ -2,6 +2,7 @@ actionControllerControllerClasses
| ActiveRecord.rb:23:1:39:3 | FooController | | ActiveRecord.rb:23:1:39:3 | FooController |
| ActiveRecord.rb:41:1:64:3 | BarController | | ActiveRecord.rb:41:1:64:3 | BarController |
| ActiveRecord.rb:66:1:70:3 | BazController | | ActiveRecord.rb:66:1:70:3 | BazController |
| ActiveRecord.rb:72:1:80:3 | AnnotatedController |
| app/controllers/comments_controller.rb:1:1:7:3 | CommentsController | | app/controllers/comments_controller.rb:1:1:7:3 | CommentsController |
| app/controllers/foo/bars_controller.rb:3:1:39:3 | BarsController | | app/controllers/foo/bars_controller.rb:3:1:39:3 | BarsController |
| app/controllers/photos_controller.rb:1:1:4:3 | PhotosController | | app/controllers/photos_controller.rb:1:1:4:3 | PhotosController |
@@ -12,6 +13,8 @@ actionControllerActionMethods
| ActiveRecord.rb:42:3:47:5 | some_other_request_handler | | ActiveRecord.rb:42:3:47:5 | some_other_request_handler |
| ActiveRecord.rb:49:3:63:5 | safe_paths | | ActiveRecord.rb:49:3:63:5 | safe_paths |
| ActiveRecord.rb:67:3:69:5 | yet_another_handler | | ActiveRecord.rb:67:3:69:5 | yet_another_handler |
| ActiveRecord.rb:73:3:75:5 | index |
| ActiveRecord.rb:77:3:79:5 | unsafe_action |
| app/controllers/comments_controller.rb:2:3:3:5 | index | | app/controllers/comments_controller.rb:2:3:3:5 | index |
| app/controllers/comments_controller.rb:5:3:6:5 | show | | app/controllers/comments_controller.rb:5:3:6:5 | show |
| app/controllers/foo/bars_controller.rb:5:3:7:5 | index | | app/controllers/foo/bars_controller.rb:5:3:7:5 | index |
@@ -38,6 +41,7 @@ paramsCalls
| ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:59:12:59:17 | call to params |
| ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params |
| ActiveRecord.rb:68:21:68:26 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params |
| ActiveRecord.rb:78:59:78:64 | call to params |
| app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params |
| app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params |
| app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params |
@@ -57,6 +61,7 @@ paramsSources
| ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:59:12:59:17 | call to params |
| ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params |
| ActiveRecord.rb:68:21:68:26 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params |
| ActiveRecord.rb:78:59:78:64 | call to params |
| app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params |
| app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params |
| app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params |

View File

@@ -22,6 +22,7 @@ activeRecordSqlExecutionRanges
| ActiveRecord.rb:46:20:46:32 | ... + ... | | ActiveRecord.rb:46:20:46:32 | ... + ... |
| ActiveRecord.rb:52:16:52:28 | "name #{...}" | | ActiveRecord.rb:52:16:52:28 | "name #{...}" |
| ActiveRecord.rb:56:20:56:39 | "username = #{...}" | | ActiveRecord.rb:56:20:56:39 | "username = #{...}" |
| ActiveRecord.rb:78:27:78:76 | "this is an unsafe annotation:..." |
activeRecordModelClassMethodCalls activeRecordModelClassMethodCalls
| ActiveRecord.rb:2:3:2:17 | call to has_many | | ActiveRecord.rb:2:3:2:17 | call to has_many |
| ActiveRecord.rb:6:3:6:24 | call to belongs_to | | ActiveRecord.rb:6:3:6:24 | call to belongs_to |
@@ -44,6 +45,8 @@ activeRecordModelClassMethodCalls
| ActiveRecord.rb:60:5:60:33 | call to find_by | | ActiveRecord.rb:60:5:60:33 | call to find_by |
| ActiveRecord.rb:62:5:62:34 | call to find | | ActiveRecord.rb:62:5:62:34 | call to find |
| ActiveRecord.rb:68:5:68:45 | call to delete_by | | ActiveRecord.rb:68:5:68:45 | call to delete_by |
| ActiveRecord.rb:74:13:74:54 | call to annotate |
| ActiveRecord.rb:78:13:78:77 | call to annotate |
potentiallyUnsafeSqlExecutingMethodCall potentiallyUnsafeSqlExecutingMethodCall
| ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:9:5:9:68 | call to find |
| ActiveRecord.rb:19:5:19:25 | call to destroy_by | | ActiveRecord.rb:19:5:19:25 | call to destroy_by |
@@ -55,6 +58,7 @@ potentiallyUnsafeSqlExecutingMethodCall
| ActiveRecord.rb:46:5:46:33 | call to delete_by | | ActiveRecord.rb:46:5:46:33 | call to delete_by |
| ActiveRecord.rb:52:5:52:29 | call to order | | ActiveRecord.rb:52:5:52:29 | call to order |
| ActiveRecord.rb:56:7:56:40 | call to find_by | | ActiveRecord.rb:56:7:56:40 | call to find_by |
| ActiveRecord.rb:78:13:78:77 | call to annotate |
activeRecordModelInstantiations activeRecordModelInstantiations
| ActiveRecord.rb:9:5:9:68 | call to find | ActiveRecord.rb:5:1:15:3 | User | | ActiveRecord.rb:9:5:9:68 | call to find | ActiveRecord.rb:5:1:15:3 | User |
| ActiveRecord.rb:13:5:13:40 | call to find_by | ActiveRecord.rb:1:1:3:3 | UserGroup | | ActiveRecord.rb:13:5:13:40 | call to find_by | ActiveRecord.rb:1:1:3:3 | UserGroup |

View File

@@ -68,3 +68,13 @@ class BazController < BarController
Admin.delete_by(params[:admin_condition]) Admin.delete_by(params[:admin_condition])
end end
end end
class AnnotatedController < ActionController::Base
def index
users = User.annotate("this is a safe annotation")
end
def unsafe_action
users = User.annotate("this is an unsafe annotation:#{params[:comment]}")
end
end

View File

@@ -137,3 +137,17 @@ class BazController < BarController
Admin.delete_by(params[:admin_condition]) Admin.delete_by(params[:admin_condition])
end end
end end
class AnnotatedController < ActionController::Base
def index
name = params[:user_name]
# GOOD: string literal arguments not controlled by user are safe for annotations
users = User.annotate("this is a safe annotation").find_by(user_name: name)
end
def unsafe_action
name = params[:user_name]
# BAD: user input passed into annotations are vulnerable to SQLi
users = User.annotate("this is an unsafe annotation:#{params[:comment]}").find_by(user_name: name)
end
end

View File

@@ -31,6 +31,8 @@ edges
| ActiveRecordInjection.rb:99:11:99:17 | ...[...] : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | | ActiveRecordInjection.rb:99:11:99:17 | ...[...] : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... |
| ActiveRecordInjection.rb:137:21:137:26 | call to params : | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : |
| ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | ActiveRecordInjection.rb:20:22:20:30 | condition : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | ActiveRecordInjection.rb:20:22:20:30 | condition : |
| ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:59:151:74 | ...[...] : |
| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." |
nodes nodes
| ActiveRecordInjection.rb:8:25:8:28 | name : | semmle.label | name : | | ActiveRecordInjection.rb:8:25:8:28 | name : | semmle.label | name : |
| ActiveRecordInjection.rb:8:31:8:34 | pass : | semmle.label | pass : | | ActiveRecordInjection.rb:8:31:8:34 | pass : | semmle.label | pass : |
@@ -80,6 +82,9 @@ nodes
| ActiveRecordInjection.rb:104:20:104:32 | ... + ... | semmle.label | ... + ... | | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | semmle.label | ... + ... |
| ActiveRecordInjection.rb:137:21:137:26 | call to params : | semmle.label | call to params : | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | semmle.label | call to params : |
| ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | semmle.label | ...[...] : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | semmle.label | ...[...] : |
| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." |
| ActiveRecordInjection.rb:151:59:151:64 | call to params : | semmle.label | call to params : |
| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | semmle.label | ...[...] : |
subpaths subpaths
#select #select
| ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | ActiveRecordInjection.rb:70:23:70:28 | call to params : | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | This SQL query depends on $@. | ActiveRecordInjection.rb:70:23:70:28 | call to params | a user-provided value | | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | ActiveRecordInjection.rb:70:23:70:28 | call to params : | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | This SQL query depends on $@. | ActiveRecordInjection.rb:70:23:70:28 | call to params | a user-provided value |
@@ -99,3 +104,4 @@ subpaths
| ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params : | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | a user-provided value | | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params : | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | a user-provided value |
| ActiveRecordInjection.rb:92:21:92:35 | ...[...] | ActiveRecordInjection.rb:92:21:92:26 | call to params : | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:92:21:92:26 | call to params | a user-provided value | | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | ActiveRecordInjection.rb:92:21:92:26 | call to params : | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:92:21:92:26 | call to params | a user-provided value |
| ActiveRecordInjection.rb:104:20:104:32 | ... + ... | ActiveRecordInjection.rb:98:10:98:15 | call to params : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | This SQL query depends on $@. | ActiveRecordInjection.rb:98:10:98:15 | call to params | a user-provided value | | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | ActiveRecordInjection.rb:98:10:98:15 | call to params : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | This SQL query depends on $@. | ActiveRecordInjection.rb:98:10:98:15 | call to params | a user-provided value |
| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | This SQL query depends on $@. | ActiveRecordInjection.rb:151:59:151:64 | call to params | a user-provided value |

View File

@@ -41,59 +41,52 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration {
} }
override predicate isSink(DataFlow::Node node, string flowstate) { override predicate isSink(DataFlow::Node node, string flowstate) {
// arguments to method calls...
exists( exists(
string className, string methodName, string paramName, ClassDecl c, AbstractFunctionDecl f, AbstractFunctionDecl funcDecl, CallExpr call, string funcName, string paramName, int arg
CallExpr call, int arg
| |
(
// arguments to method calls...
exists(string className, ClassDecl c |
( (
// `NSRange.init` // `NSRange.init`
className = "NSRange" and className = "NSRange" and
methodName = "init(location:length:)" and funcName = "init(location:length:)" and
paramName = ["location", "length"] paramName = ["location", "length"]
or or
// `NSString.character` // `NSString.character`
className = ["NSString", "NSMutableString"] and className = ["NSString", "NSMutableString"] and
methodName = "character(at:)" and funcName = "character(at:)" and
paramName = "at" paramName = "at"
or or
// `NSString.character` // `NSString.character`
className = ["NSString", "NSMutableString"] and className = ["NSString", "NSMutableString"] and
methodName = "substring(from:)" and funcName = "substring(from:)" and
paramName = "from" paramName = "from"
or or
// `NSString.character` // `NSString.character`
className = ["NSString", "NSMutableString"] and className = ["NSString", "NSMutableString"] and
methodName = "substring(to:)" and funcName = "substring(to:)" and
paramName = "to" paramName = "to"
or or
// `NSMutableString.insert` // `NSMutableString.insert`
className = "NSMutableString" and className = "NSMutableString" and
methodName = "insert(_:at:)" and funcName = "insert(_:at:)" and
paramName = "at" paramName = "at"
) and ) and
c.getName() = className and c.getName() = className and
c.getAMember() = f and // TODO: will this even work if its defined in a parent class? c.getAMember() = funcDecl and
call.getFunction().(ApplyExpr).getStaticTarget() = f and call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and
f.getName() = methodName and
f.getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "String" // `String` length flowing into `NSString` flowstate = "String" // `String` length flowing into `NSString`
) )
or or
// arguments to function calls... // arguments to function calls...
exists(string funcName, string paramName, CallExpr call, int arg |
// `NSMakeRange` // `NSMakeRange`
funcName = "NSMakeRange(_:_:)" and funcName = "NSMakeRange(_:_:)" and
paramName = ["loc", "len"] and paramName = ["loc", "len"] and
call.getStaticTarget().getName() = funcName and call.getStaticTarget() = funcDecl and
call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "String" // `String` length flowing into `NSString` flowstate = "String" // `String` length flowing into `NSString`
)
or or
// arguments to function calls... // arguments to method calls...
exists(string funcName, string paramName, CallExpr call, int arg |
( (
// `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast`
funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and
@@ -115,14 +108,13 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration {
funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and
paramName = "distance" paramName = "distance"
) and ) and
call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and
call.getFunction()
.(ApplyExpr)
.getStaticTarget()
.getParam(pragma[only_bind_into](arg))
.getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "NSString" // `NSString` length flowing into `String` flowstate = "NSString" // `NSString` length flowing into `String`
) and
// match up `funcName`, `paramName`, `arg`, `node`.
funcDecl.getName() = funcName and
funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr()
) )
} }

View File

@@ -1,4 +1,5 @@
edges edges
| StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... |
| StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... |
| StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... |
| StringLengthConflation.swift:93:28:93:31 | .length : | StringLengthConflation.swift:93:28:93:40 | ... call to -(_:_:) ... | | StringLengthConflation.swift:93:28:93:31 | .length : | StringLengthConflation.swift:93:28:93:40 | ... call to -(_:_:) ... |
@@ -15,6 +16,8 @@ edges
| StringLengthConflation.swift:135:36:135:38 | .count : | StringLengthConflation.swift:135:36:135:46 | ... call to -(_:_:) ... | | StringLengthConflation.swift:135:36:135:38 | .count : | StringLengthConflation.swift:135:36:135:46 | ... call to -(_:_:) ... |
| StringLengthConflation.swift:141:28:141:30 | .count : | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | | StringLengthConflation.swift:141:28:141:30 | .count : | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... |
nodes nodes
| StringLengthConflation2.swift:37:34:37:36 | .count : | semmle.label | .count : |
| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... |
| StringLengthConflation.swift:53:43:53:46 | .length | semmle.label | .length | | StringLengthConflation.swift:53:43:53:46 | .length | semmle.label | .length |
| StringLengthConflation.swift:60:47:60:50 | .length : | semmle.label | .length : | | StringLengthConflation.swift:60:47:60:50 | .length : | semmle.label | .length : |
| StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | semmle.label | ... call to /(_:_:) ... | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | semmle.label | ... call to /(_:_:) ... |
@@ -50,6 +53,7 @@ nodes
| StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... | | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... |
subpaths subpaths
#select #select
| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | This String length is used in an NSString, but it may not be equivalent. |
| StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | This NSString length is used in a String, but it may not be equivalent. |
| StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. |
| StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. |

View File

@@ -0,0 +1,42 @@
// this test is in a separate file, because we want to test with a slightly different library
// implementation. In this version, some of the functions of `NSString` are in fact implemented
// in a base class `NSStringBase`.
// --- stubs ---
func print(_ items: Any...) {}
typealias unichar = UInt16
class NSObject
{
}
class NSStringBase : NSObject
{
func substring(from: Int) -> String { return "" }
}
class NSString : NSStringBase
{
init(string: String) { length = string.count }
func substring(to: Int) -> String { return "" }
private(set) var length: Int
}
// --- tests ---
func test(s: String) {
let ns = NSString(string: s)
let nstr1 = ns.substring(from: ns.length - 1) // GOOD
let nstr2 = ns.substring(from: s.count - 1) // BAD: String length used in NSString [NOT DETECTED]
let nstr3 = ns.substring(to: ns.length - 1) // GOOD
let nstr4 = ns.substring(to: s.count - 1) // BAD: String length used in NSString
print("substrings '\(nstr1)' '\(nstr2)' / '\(nstr3)' '\(nstr4)'")
}
// `begin :thumbsup: end`, with thumbs up emoji and skin tone modifier
test(s: "begin \u{0001F44D}\u{0001F3FF} end")