Merge branch 'main' into encoding

This commit is contained in:
Arthur Baars
2022-07-21 21:21:17 +02:00
committed by GitHub
20 changed files with 694 additions and 486 deletions

View File

@@ -77,7 +77,7 @@ class ExecState extends DataFlow::FlowState {
ExecState() {
this =
"ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and
interestingConcatenation(fst, snd)
interestingConcatenation(pragma[only_bind_into](fst), pragma[only_bind_into](snd))
}
DataFlow::Node getFstNode() { result = fst }

View File

@@ -2,7 +2,54 @@ function RegisterExtractorPack(id)
local extractor = GetPlatformToolsDirectory() ..
'Semmle.Extraction.CSharp.Driver'
if OperatingSystem == 'windows' then extractor = extractor .. '.exe' end
function DotnetMatcherBuild(compilerName, compilerPath, compilerArguments,
_languageId)
if compilerName ~= 'dotnet' and compilerName ~= 'dotnet.exe' then
return nil
end
-- The dotnet CLI has the following usage instructions:
-- dotnet [sdk-options] [command] [command-options] [arguments]
-- we are interested in dotnet build, which has the following usage instructions:
-- dotnet [options] build [<PROJECT | SOLUTION>...]
-- For now, parse the command line as follows:
-- Everything that starts with `-` (or `/`) will be ignored.
-- The first non-option argument is treated as the command.
-- if that's `build`, we append `/p:UseSharedCompilation=false` to the command line,
-- otherwise we do nothing.
local match = false
local argv = compilerArguments.argv
if OperatingSystem == 'windows' then
-- let's hope that this split matches the escaping rules `dotnet` applies to command line arguments
-- or, at least, that it is close enough
argv =
NativeArgumentsToArgv(compilerArguments.nativeArgumentPointer)
end
for i, arg in ipairs(argv) do
-- dotnet options start with either - or / (both are legal)
local firstCharacter = string.sub(arg, 1, 1)
if not (firstCharacter == '-') and not (firstCharacter == '/') then
Log(1, 'Dotnet subcommand detected: %s', arg)
if arg == 'build' then match = true end
break
end
end
if match then
return {
order = ORDER_REPLACE,
invocation = BuildExtractorInvocation(id, compilerPath,
compilerPath,
compilerArguments, nil, {
'/p:UseSharedCompilation=false'
})
}
end
return nil
end
local windowsMatchers = {
DotnetMatcherBuild,
CreatePatternMatcher({'^dotnet%.exe$'}, MatchCompilerName, extractor, {
prepend = {'--dotnetexec', '--cil'},
order = ORDER_BEFORE
@@ -10,22 +57,21 @@ function RegisterExtractorPack(id)
CreatePatternMatcher({'^csc.*%.exe$'}, MatchCompilerName, extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE
}),
CreatePatternMatcher({'^fakes.*%.exe$', 'moles.*%.exe'},
MatchCompilerName, nil, {trace = false})
}
local posixMatchers = {
CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName,
extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE
}),
DotnetMatcherBuild,
CreatePatternMatcher({'^mono', '^dotnet$'}, MatchCompilerName,
extractor, {
prepend = {'--dotnetexec', '--cil'},
order = ORDER_BEFORE
}),
CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName,
extractor, {
prepend = {'--compiler', '"${compiler}"', '--cil'},
order = ORDER_BEFORE
}), function(compilerName, compilerPath, compilerArguments, _languageId)
if MatchCompilerName('^msbuild$', compilerName, compilerPath,
compilerArguments) or
@@ -49,7 +95,6 @@ function RegisterExtractorPack(id)
else
return posixMatchers
end
end
-- Return a list of minimum supported versions of the configuration file format

View File

@@ -11,14 +11,17 @@ CodeQL.
Languages and compilers
#######################
CodeQL supports the following languages and compilers.
The current versions of the CodeQL CLI (`changelog <https://github.com/github/codeql-cli-binaries/blob/main/CHANGELOG.md>`__, `releases <https://github.com/github/codeql-cli-binaries/releases>`__),
CodeQL library packs (`source <https://github.com/github/codeql/tree/codeql-cli/latest>`__),
and CodeQL bundle (`releases <https://github.com/github/codeql-action/releases>`__)
support the following languages and compilers.
.. include:: ../support/reusables/versions-compilers.rst
Frameworks and libraries
########################
The libraries and queries in the current version of CodeQL have been explicitly checked against the libraries and frameworks listed below.
The current versions of the CodeQL library and query packs (`source <https://github.com/github/codeql/tree/codeql-cli/latest>`__) have been explicitly checked against the libraries and frameworks listed below.
.. pull-quote::

View File

@@ -1,6 +1,10 @@
C and C++ built-in support
================================
Provided by the current versions of the
CodeQL query pack ``codeql/cpp-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/src>`__)
and the CodeQL library pack ``codeql/cpp-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/cpp/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable
@@ -14,6 +18,10 @@ C and C++ built-in support
C# built-in support
================================
Provided by the current versions of the
CodeQL query pack ``codeql/csharp-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/src>`__)
and the CodeQL library pack ``codeql/csharp-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/csharp/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable
@@ -33,6 +41,10 @@ C# built-in support
Go built-in support
================================
Provided by the current versions of the
CodeQL query pack ``codeql/go-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/src>`__)
and the CodeQL library pack ``codeql/go-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/go/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable
@@ -84,6 +96,10 @@ Go built-in support
Java built-in support
==================================
Provided by the current versions of the
CodeQL query pack ``codeql/java-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/src>`__)
and the CodeQL library pack ``codeql/java-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/java/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable
@@ -113,6 +129,10 @@ Java built-in support
JavaScript and TypeScript built-in support
=======================================================
Provided by the current versions of the
CodeQL query pack ``codeql/javascript-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/src>`__)
and the CodeQL library pack ``codeql/javascript-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/javascript/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable
@@ -156,6 +176,10 @@ JavaScript and TypeScript built-in support
Python built-in support
====================================
Provided by the current versions of the
CodeQL query pack ``codeql/python-queries`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/src/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/src>`__)
and the CodeQL library pack ``codeql/python-all`` (`changelog <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/lib/CHANGELOG.md>`__, `source <https://github.com/github/codeql/tree/codeql-cli/latest/python/ql/lib>`__).
.. csv-table::
:header-rows: 1
:class: fullWidthTable

BIN
ruby/Cargo.lock generated

Binary file not shown.

View File

@@ -11,7 +11,7 @@ flate2 = "1.0"
node-types = { path = "../node-types" }
tree-sitter = "0.19"
tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" }
clap = "3.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }

View File

@@ -1,161 +1,112 @@
use crate::trap;
use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName};
use std::borrow::Cow;
use std::collections::BTreeMap as Map;
use std::collections::BTreeSet as Set;
use std::fmt;
use std::io::Write;
use std::path::Path;
use tracing::{error, info, span, Level};
use tree_sitter::{Language, Node, Parser, Range, Tree};
pub struct TrapWriter {
/// The accumulated trap entries
trap_output: Vec<TrapEntry>,
/// A counter for generating fresh labels
counter: u32,
/// cache of global keys
global_keys: std::collections::HashMap<String, Label>,
pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label {
let (file_label, fresh) =
writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path)));
if fresh {
writer.add_tuple(
"files",
vec![
trap::Arg::Label(file_label),
trap::Arg::String(normalize_path(absolute_path)),
],
);
populate_parent_folders(writer, file_label, absolute_path.parent());
}
file_label
}
pub fn new_trap_writer() -> TrapWriter {
TrapWriter {
counter: 0,
trap_output: Vec::new(),
global_keys: std::collections::HashMap::new(),
fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label {
let (file_label, fresh) = writer.global_id("empty;sourcefile");
if fresh {
writer.add_tuple(
"files",
vec![
trap::Arg::Label(file_label),
trap::Arg::String("".to_string()),
],
);
}
file_label
}
impl TrapWriter {
/// Gets a label that will hold the unique ID of the passed string at import time.
/// This can be used for incrementally importable TRAP files -- use globally unique
/// strings to compute a unique ID for table tuples.
///
/// Note: You probably want to make sure that the key strings that you use are disjoint
/// for disjoint column types; the standard way of doing this is to prefix (or append)
/// the column type name to the ID. Thus, you might identify methods in Java by the
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
pub fn populate_empty_location(writer: &mut trap::Writer) {
let file_label = populate_empty_file(writer);
location(writer, file_label, 0, 0, 0, 0);
}
fn fresh_id(&mut self) -> Label {
let label = Label(self.counter);
self.counter += 1;
self.trap_output.push(TrapEntry::FreshId(label));
label
}
fn global_id(&mut self, key: &str) -> (Label, bool) {
if let Some(label) = self.global_keys.get(key) {
return (*label, false);
}
let label = Label(self.counter);
self.counter += 1;
self.global_keys.insert(key.to_owned(), label);
self.trap_output
.push(TrapEntry::MapLabelToKey(label, key.to_owned()));
(label, true)
}
fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
self.trap_output
.push(TrapEntry::GenericTuple(table_name.to_owned(), args))
}
fn populate_file(&mut self, absolute_path: &Path) -> Label {
let (file_label, fresh) = self.global_id(&full_id_for_file(absolute_path));
if fresh {
self.add_tuple(
"files",
vec![
Arg::Label(file_label),
Arg::String(normalize_path(absolute_path)),
],
);
self.populate_parent_folders(file_label, absolute_path.parent());
}
file_label
}
fn populate_empty_file(&mut self) -> Label {
let (file_label, fresh) = self.global_id("empty;sourcefile");
if fresh {
self.add_tuple(
"files",
vec![Arg::Label(file_label), Arg::String("".to_string())],
);
}
file_label
}
pub fn populate_empty_location(&mut self) {
let file_label = self.populate_empty_file();
self.location(file_label, 0, 0, 0, 0);
}
fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) {
let mut path = path;
let mut child_label = child_label;
loop {
match path {
None => break,
Some(folder) => {
let (folder_label, fresh) = self.global_id(&full_id_for_folder(folder));
self.add_tuple(
"containerparent",
vec![Arg::Label(folder_label), Arg::Label(child_label)],
pub fn populate_parent_folders(
writer: &mut trap::Writer,
child_label: trap::Label,
path: Option<&Path>,
) {
let mut path = path;
let mut child_label = child_label;
loop {
match path {
None => break,
Some(folder) => {
let (folder_label, fresh) =
writer.global_id(&trap::full_id_for_folder(&normalize_path(folder)));
writer.add_tuple(
"containerparent",
vec![
trap::Arg::Label(folder_label),
trap::Arg::Label(child_label),
],
);
if fresh {
writer.add_tuple(
"folders",
vec![
trap::Arg::Label(folder_label),
trap::Arg::String(normalize_path(folder)),
],
);
if fresh {
self.add_tuple(
"folders",
vec![
Arg::Label(folder_label),
Arg::String(normalize_path(folder)),
],
);
path = folder.parent();
child_label = folder_label;
} else {
break;
}
path = folder.parent();
child_label = folder_label;
} else {
break;
}
}
}
}
}
fn location(
&mut self,
file_label: Label,
start_line: usize,
start_column: usize,
end_line: usize,
end_column: usize,
) -> Label {
let (loc_label, fresh) = self.global_id(&format!(
"loc,{{{}}},{},{},{},{}",
file_label, start_line, start_column, end_line, end_column
));
if fresh {
self.add_tuple(
"locations_default",
vec![
Arg::Label(loc_label),
Arg::Label(file_label),
Arg::Int(start_line),
Arg::Int(start_column),
Arg::Int(end_line),
Arg::Int(end_column),
],
);
}
loc_label
}
fn comment(&mut self, text: String) {
self.trap_output.push(TrapEntry::Comment(text));
}
pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> {
write!(writer, "{}", Program(self.trap_output))
fn location(
writer: &mut trap::Writer,
file_label: trap::Label,
start_line: usize,
start_column: usize,
end_line: usize,
end_column: usize,
) -> trap::Label {
let (loc_label, fresh) = writer.global_id(&format!(
"loc,{{{}}},{},{},{},{}",
file_label, start_line, start_column, end_line, end_column
));
if fresh {
writer.add_tuple(
"locations_default",
vec![
trap::Arg::Label(loc_label),
trap::Arg::Label(file_label),
trap::Arg::Int(start_line),
trap::Arg::Int(start_column),
trap::Arg::Int(end_line),
trap::Arg::Int(end_column),
],
);
}
loc_label
}
/// Extracts the source file at `path`, which is assumed to be canonicalized.
@@ -163,71 +114,43 @@ pub fn extract(
language: Language,
language_prefix: &str,
schema: &NodeTypeMap,
trap_writer: &mut TrapWriter,
trap_writer: &mut trap::Writer,
path: &Path,
source: &[u8],
ranges: &[Range],
) -> std::io::Result<()> {
let path_str = format!("{}", path.display());
let span = span!(
Level::TRACE,
"extract",
file = %path.display()
file = %path_str
);
let _enter = span.enter();
info!("extracting: {}", path.display());
info!("extracting: {}", path_str);
let mut parser = Parser::new();
parser.set_language(language).unwrap();
parser.set_included_ranges(ranges).unwrap();
let tree = parser.parse(&source, None).expect("Failed to parse file");
trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display()));
let file_label = &trap_writer.populate_file(path);
let mut visitor = Visitor {
trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str));
let file_label = populate_file(trap_writer, path);
let mut visitor = Visitor::new(
source,
trap_writer,
// TODO: should we handle path strings that are not valid UTF8 better?
path: format!("{}", path.display()),
file_label: *file_label,
toplevel_child_counter: 0,
stack: Vec::new(),
&path_str,
file_label,
language_prefix,
schema,
};
);
traverse(&tree, &mut visitor);
parser.reset();
Ok(())
}
/// Escapes a string for use in a TRAP key, by replacing special characters with
/// HTML entities.
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
fn needs_escaping(c: char) -> bool {
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
}
let key = key.into();
if key.contains(needs_escaping) {
let mut escaped = String::with_capacity(2 * key.len());
for c in key.chars() {
match c {
'&' => escaped.push_str("&amp;"),
'{' => escaped.push_str("&lbrace;"),
'}' => escaped.push_str("&rbrace;"),
'"' => escaped.push_str("&quot;"),
'@' => escaped.push_str("&commat;"),
'#' => escaped.push_str("&num;"),
_ => escaped.push(c),
}
}
Cow::Owned(escaped)
} else {
key
}
}
/// Normalizes the path according the common CodeQL specification. Assumes that
/// `path` has already been canonicalized using `std::fs::canonicalize`.
fn normalize_path(path: &Path) -> String {
@@ -267,34 +190,28 @@ fn normalize_path(path: &Path) -> String {
}
}
fn full_id_for_file(path: &Path) -> String {
format!("{};sourcefile", escape_key(&normalize_path(path)))
}
fn full_id_for_folder(path: &Path) -> String {
format!("{};folder", escape_key(&normalize_path(path)))
}
struct ChildNode {
field_name: Option<&'static str>,
label: Label,
label: trap::Label,
type_name: TypeName,
}
struct Visitor<'a> {
/// The file path of the source code (as string)
path: String,
path: &'a str,
/// The label to use whenever we need to refer to the `@file` entity of this
/// source file.
file_label: Label,
file_label: trap::Label,
/// The source code as a UTF-8 byte array
source: &'a [u8],
/// A TrapWriter to accumulate trap entries
trap_writer: &'a mut TrapWriter,
/// A trap::Writer to accumulate trap entries
trap_writer: &'a mut trap::Writer,
/// A counter for top-level child nodes
toplevel_child_counter: usize,
/// Language prefix
language_prefix: &'a str,
/// Language-specific name of the AST info table
ast_node_info_table_name: String,
/// Language-specific name of the tokeninfo table
tokeninfo_table_name: String,
/// A lookup table from type name to node types
schema: &'a NodeTypeMap,
/// A stack for gathering information from child nodes. Whenever a node is
@@ -303,27 +220,48 @@ struct Visitor<'a> {
/// node the list containing the child data is popped from the stack and
/// matched against the dbscheme for the node. If the expectations are met
/// the corresponding row definitions are added to the trap_output.
stack: Vec<(Label, usize, Vec<ChildNode>)>,
stack: Vec<(trap::Label, usize, Vec<ChildNode>)>,
}
impl Visitor<'_> {
impl<'a> Visitor<'a> {
fn new(
source: &'a [u8],
trap_writer: &'a mut trap::Writer,
path: &'a str,
file_label: trap::Label,
language_prefix: &str,
schema: &'a NodeTypeMap,
) -> Visitor<'a> {
Visitor {
path,
file_label,
source,
trap_writer,
toplevel_child_counter: 0,
ast_node_info_table_name: format!("{}_ast_node_info", language_prefix),
tokeninfo_table_name: format!("{}_tokeninfo", language_prefix),
schema,
stack: Vec::new(),
}
}
fn record_parse_error(
&mut self,
error_message: String,
full_error_message: String,
loc: Label,
loc: trap::Label,
) {
error!("{}", full_error_message);
let id = self.trap_writer.fresh_id();
self.trap_writer.add_tuple(
"diagnostics",
vec![
Arg::Label(id),
Arg::Int(40), // severity 40 = error
Arg::String("parse_error".to_string()),
Arg::String(error_message),
Arg::String(full_error_message),
Arg::Label(loc),
trap::Arg::Label(id),
trap::Arg::Int(40), // severity 40 = error
trap::Arg::String("parse_error".to_string()),
trap::Arg::String(error_message),
trap::Arg::String(full_error_message),
trap::Arg::Label(loc),
],
);
}
@@ -335,7 +273,8 @@ impl Visitor<'_> {
node: Node,
) {
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
let loc = self.trap_writer.location(
let loc = location(
self.trap_writer,
self.file_label,
start_line,
start_column,
@@ -374,7 +313,8 @@ impl Visitor<'_> {
}
let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack");
let (start_line, start_column, end_line, end_column) = location_for(self.source, node);
let loc = self.trap_writer.location(
let loc = location(
self.trap_writer,
self.file_label,
start_line,
start_column,
@@ -402,19 +342,19 @@ impl Visitor<'_> {
match &table.kind {
EntryKind::Token { kind_id, .. } => {
self.trap_writer.add_tuple(
&format!("{}_ast_node_info", self.language_prefix),
&self.ast_node_info_table_name,
vec![
Arg::Label(id),
Arg::Label(parent_id),
Arg::Int(parent_index),
Arg::Label(loc),
trap::Arg::Label(id),
trap::Arg::Label(parent_id),
trap::Arg::Int(parent_index),
trap::Arg::Label(loc),
],
);
self.trap_writer.add_tuple(
&format!("{}_tokeninfo", self.language_prefix),
&self.tokeninfo_table_name,
vec![
Arg::Label(id),
Arg::Int(*kind_id),
trap::Arg::Label(id),
trap::Arg::Int(*kind_id),
sliced_source_arg(self.source, node),
],
);
@@ -425,15 +365,15 @@ impl Visitor<'_> {
} => {
if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) {
self.trap_writer.add_tuple(
&format!("{}_ast_node_info", self.language_prefix),
&self.ast_node_info_table_name,
vec![
Arg::Label(id),
Arg::Label(parent_id),
Arg::Int(parent_index),
Arg::Label(loc),
trap::Arg::Label(id),
trap::Arg::Label(parent_id),
trap::Arg::Int(parent_index),
trap::Arg::Label(loc),
],
);
let mut all_args = vec![Arg::Label(id)];
let mut all_args = vec![trap::Arg::Label(id)];
all_args.extend(args);
self.trap_writer.add_tuple(table_name, all_args);
}
@@ -472,9 +412,9 @@ impl Visitor<'_> {
node: &Node,
fields: &[Field],
child_nodes: &[ChildNode],
parent_id: Label,
) -> Option<Vec<Arg>> {
let mut map: Map<&Option<String>, (&Field, Vec<Arg>)> = Map::new();
parent_id: trap::Label,
) -> Option<Vec<trap::Arg>> {
let mut map: Map<&Option<String>, (&Field, Vec<trap::Arg>)> = Map::new();
for field in fields {
map.insert(&field.name, (field, Vec::new()));
}
@@ -488,9 +428,9 @@ impl Visitor<'_> {
{
// We can safely unwrap because type_matches checks the key is in the map.
let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap();
values.push(Arg::Int(*int_value));
values.push(trap::Arg::Int(*int_value));
} else {
values.push(Arg::Label(child_node.label));
values.push(trap::Arg::Label(child_node.label));
}
} else if field.name.is_some() {
let error_message = format!(
@@ -569,9 +509,9 @@ impl Visitor<'_> {
);
break;
}
let mut args = vec![Arg::Label(parent_id)];
let mut args = vec![trap::Arg::Label(parent_id)];
if *has_index {
args.push(Arg::Int(index))
args.push(trap::Arg::Int(index))
}
args.push(child_value.clone());
self.trap_writer.add_tuple(table_name, args);
@@ -625,9 +565,9 @@ impl Visitor<'_> {
}
// Emit a slice of a source file as an Arg.
fn sliced_source_arg(source: &[u8], n: Node) -> Arg {
fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg {
let range = n.byte_range();
Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned())
}
// Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated.
@@ -699,59 +639,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) {
}
}
pub struct Program(Vec<TrapEntry>);
impl fmt::Display for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut text = String::new();
for trap_entry in &self.0 {
text.push_str(&format!("{}\n", trap_entry));
}
write!(f, "{}", text)
}
}
enum TrapEntry {
/// Maps the label to a fresh id, e.g. `#123=*`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7=@"foo"`.
MapLabelToKey(Label, String),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for TrapEntry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TrapEntry::FreshId(label) => write!(f, "{}=*", label),
TrapEntry::MapLabelToKey(label, key) => {
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
}
TrapEntry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ",")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
TrapEntry::Comment(line) => write!(f, "// {}", line),
}
}
}
#[derive(Debug, Copy, Clone)]
// Identifiers of the form #0, #1...
struct Label(u32);
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "#{:x}", self.0)
}
}
// Numeric indices.
#[derive(Debug, Copy, Clone)]
struct Index(usize);
@@ -761,69 +648,3 @@ impl fmt::Display for Index {
write!(f, "{}", self.0)
}
}
// Some untyped argument to a TrapEntry.
#[derive(Debug, Clone)]
enum Arg {
Label(Label),
Int(usize),
String(String),
}
const MAX_STRLEN: usize = 1048576;
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Arg::Label(x) => write!(f, "{}", x),
Arg::Int(x) => write!(f, "{}", x),
Arg::String(x) => write!(
f,
"\"{}\"",
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
),
}
}
}
/// Limit the length (in bytes) of a string. If the string's length in bytes is
/// less than or equal to the limit then the entire string is returned. Otherwise
/// the string is sliced at the provided limit. If there is a multi-byte character
/// at the limit then the returned slice will be slightly shorter than the limit to
/// avoid splitting that multi-byte character.
fn limit_string(string: &str, max_size: usize) -> &str {
if string.len() <= max_size {
return string;
}
let p = string.as_bytes();
let mut index = max_size;
// We want to clip the string at [max_size]; however, the character at that position
// may span several bytes. We need to find the first byte of the character. In UTF-8
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
// Therefore we decrement the index as long as there are bytes matching this pattern.
// This ensures we cut the string at the border between one character and another.
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
index -= 1;
}
&string[0..index]
}
#[test]
fn limit_string_test() {
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
}
#[test]
fn escape_key_test() {
assert_eq!("foo!", escape_key("foo!"));
assert_eq!("foo&lbrace;&rbrace;", escape_key("foo{}"));
assert_eq!("&lbrace;&rbrace;", escape_key("{}"));
assert_eq!("", escape_key(""));
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
assert_eq!(
"/path/to/foo&amp;&lbrace;&rbrace;&quot;&commat;&num;.rb",
escape_key("/path/to/foo&{}\"@#.rb")
);
}

View File

@@ -1,4 +1,5 @@
mod extractor;
mod trap;
#[macro_use]
extern crate lazy_static;
@@ -6,50 +7,13 @@ extern crate num_cpus;
use clap::arg;
use encoding::{self};
use flate2::write::GzEncoder;
use rayon::prelude::*;
use std::borrow::Cow;
use std::fs;
use std::io::{BufRead, BufWriter};
use std::io::BufRead;
use std::path::{Path, PathBuf};
use tree_sitter::{Language, Parser, Range};
enum TrapCompression {
None,
Gzip,
}
impl TrapCompression {
fn from_env() -> TrapCompression {
match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") {
Ok(method) => match TrapCompression::from_string(&method) {
Some(c) => c,
None => {
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
TrapCompression::Gzip
}
},
// Default compression method if the env var isn't set:
Err(_) => TrapCompression::Gzip,
}
}
fn from_string(s: &str) -> Option<TrapCompression> {
match s.to_lowercase().as_ref() {
"none" => Some(TrapCompression::None),
"gzip" => Some(TrapCompression::Gzip),
_ => None,
}
}
fn extension(&self) -> &str {
match self {
TrapCompression::None => "trap",
TrapCompression::Gzip => "trap.gz",
}
}
}
/**
* Gets the number of threads the extractor should use, by reading the
* CODEQL_THREADS environment variable and using it as described in the
@@ -141,7 +105,7 @@ fn main() -> std::io::Result<()> {
.value_of("output-dir")
.expect("missing --output-dir");
let trap_dir = PathBuf::from(trap_dir);
let trap_compression = TrapCompression::from_env();
let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION");
let file_list = matches.value_of("file-list").expect("missing --file-list");
let file_list = fs::File::open(file_list)?;
@@ -165,7 +129,7 @@ fn main() -> std::io::Result<()> {
let mut source = std::fs::read(&path)?;
let mut needs_conversion = false;
let code_ranges;
let mut trap_writer = extractor::new_trap_writer();
let mut trap_writer = trap::Writer::new();
if path.extension().map_or(false, |x| x == "erb") {
tracing::info!("scanning: {}", path.display());
extractor::extract(
@@ -246,33 +210,25 @@ fn main() -> std::io::Result<()> {
} else {
std::fs::copy(&path, &src_archive_file)?;
}
write_trap(&trap_dir, path, trap_writer, &trap_compression)
write_trap(&trap_dir, path, &trap_writer, trap_compression)
})
.expect("failed to extract files");
let path = PathBuf::from("extras");
let mut trap_writer = extractor::new_trap_writer();
trap_writer.populate_empty_location();
write_trap(&trap_dir, path, trap_writer, &trap_compression)
let mut trap_writer = trap::Writer::new();
extractor::populate_empty_location(&mut trap_writer);
write_trap(&trap_dir, path, &trap_writer, trap_compression)
}
fn write_trap(
trap_dir: &Path,
path: PathBuf,
trap_writer: extractor::TrapWriter,
trap_compression: &TrapCompression,
trap_writer: &trap::Writer,
trap_compression: trap::Compression,
) -> std::io::Result<()> {
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
std::fs::create_dir_all(&trap_file.parent().unwrap())?;
let trap_file = std::fs::File::create(&trap_file)?;
let mut trap_file = BufWriter::new(trap_file);
match trap_compression {
TrapCompression::None => trap_writer.output(&mut trap_file),
TrapCompression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
trap_writer.output(&mut compressed_writer)
}
}
trap_writer.write_to_file(&trap_file, trap_compression)
}
fn scan_erb(

272
ruby/extractor/src/trap.rs Normal file
View File

@@ -0,0 +1,272 @@
use std::borrow::Cow;
use std::fmt;
use std::io::{BufWriter, Write};
use std::path::Path;
use flate2::write::GzEncoder;
pub struct Writer {
/// The accumulated trap entries
trap_output: Vec<Entry>,
/// A counter for generating fresh labels
counter: u32,
/// cache of global keys
global_keys: std::collections::HashMap<String, Label>,
}
impl Writer {
pub fn new() -> Writer {
Writer {
counter: 0,
trap_output: Vec::new(),
global_keys: std::collections::HashMap::new(),
}
}
pub fn fresh_id(&mut self) -> Label {
let label = Label(self.counter);
self.counter += 1;
self.trap_output.push(Entry::FreshId(label));
label
}
/// Gets a label that will hold the unique ID of the passed string at import time.
/// This can be used for incrementally importable TRAP files -- use globally unique
/// strings to compute a unique ID for table tuples.
///
/// Note: You probably want to make sure that the key strings that you use are disjoint
/// for disjoint column types; the standard way of doing this is to prefix (or append)
/// the column type name to the ID. Thus, you might identify methods in Java by the
/// full ID "methods_com.method.package.DeclaringClass.method(argumentList)".
pub fn global_id(&mut self, key: &str) -> (Label, bool) {
if let Some(label) = self.global_keys.get(key) {
return (*label, false);
}
let label = Label(self.counter);
self.counter += 1;
self.global_keys.insert(key.to_owned(), label);
self.trap_output
.push(Entry::MapLabelToKey(label, key.to_owned()));
(label, true)
}
pub fn add_tuple(&mut self, table_name: &str, args: Vec<Arg>) {
self.trap_output
.push(Entry::GenericTuple(table_name.to_owned(), args))
}
pub fn comment(&mut self, text: String) {
self.trap_output.push(Entry::Comment(text));
}
pub fn write_to_file(&self, path: &Path, compression: Compression) -> std::io::Result<()> {
let trap_file = std::fs::File::create(path)?;
let mut trap_file = BufWriter::new(trap_file);
match compression {
Compression::None => self.write_trap_entries(&mut trap_file),
Compression::Gzip => {
let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast());
self.write_trap_entries(&mut compressed_writer)
}
}
}
fn write_trap_entries<W: Write>(&self, file: &mut W) -> std::io::Result<()> {
for trap_entry in &self.trap_output {
writeln!(file, "{}", trap_entry)?;
}
std::io::Result::Ok(())
}
}
pub enum Entry {
/// Maps the label to a fresh id, e.g. `#123=*`.
FreshId(Label),
/// Maps the label to a key, e.g. `#7=@"foo"`.
MapLabelToKey(Label, String),
/// foo_bar(arg*)
GenericTuple(String, Vec<Arg>),
Comment(String),
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Entry::FreshId(label) => write!(f, "{}=*", label),
Entry::MapLabelToKey(label, key) => {
write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\""))
}
Entry::GenericTuple(name, args) => {
write!(f, "{}(", name)?;
for (index, arg) in args.iter().enumerate() {
if index > 0 {
write!(f, ",")?;
}
write!(f, "{}", arg)?;
}
write!(f, ")")
}
Entry::Comment(line) => write!(f, "// {}", line),
}
}
}
#[derive(Debug, Copy, Clone)]
// Identifiers of the form #0, #1...
pub struct Label(u32);
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "#{:x}", self.0)
}
}
// Some untyped argument to a TrapEntry.
#[derive(Debug, Clone)]
pub enum Arg {
Label(Label),
Int(usize),
String(String),
}
const MAX_STRLEN: usize = 1048576;
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Arg::Label(x) => write!(f, "{}", x),
Arg::Int(x) => write!(f, "{}", x),
Arg::String(x) => write!(
f,
"\"{}\"",
limit_string(x, MAX_STRLEN).replace("\"", "\"\"")
),
}
}
}
pub struct Program(Vec<Entry>);
impl fmt::Display for Program {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut text = String::new();
for trap_entry in &self.0 {
text.push_str(&format!("{}\n", trap_entry));
}
write!(f, "{}", text)
}
}
pub fn full_id_for_file(normalized_path: &str) -> String {
format!("{};sourcefile", escape_key(normalized_path))
}
pub fn full_id_for_folder(normalized_path: &str) -> String {
format!("{};folder", escape_key(normalized_path))
}
/// Escapes a string for use in a TRAP key, by replacing special characters with
/// HTML entities.
fn escape_key<'a, S: Into<Cow<'a, str>>>(key: S) -> Cow<'a, str> {
fn needs_escaping(c: char) -> bool {
matches!(c, '&' | '{' | '}' | '"' | '@' | '#')
}
let key = key.into();
if key.contains(needs_escaping) {
let mut escaped = String::with_capacity(2 * key.len());
for c in key.chars() {
match c {
'&' => escaped.push_str("&amp;"),
'{' => escaped.push_str("&lbrace;"),
'}' => escaped.push_str("&rbrace;"),
'"' => escaped.push_str("&quot;"),
'@' => escaped.push_str("&commat;"),
'#' => escaped.push_str("&num;"),
_ => escaped.push(c),
}
}
Cow::Owned(escaped)
} else {
key
}
}
/// Limit the length (in bytes) of a string. If the string's length in bytes is
/// less than or equal to the limit then the entire string is returned. Otherwise
/// the string is sliced at the provided limit. If there is a multi-byte character
/// at the limit then the returned slice will be slightly shorter than the limit to
/// avoid splitting that multi-byte character.
fn limit_string(string: &str, max_size: usize) -> &str {
if string.len() <= max_size {
return string;
}
let p = string.as_bytes();
let mut index = max_size;
// We want to clip the string at [max_size]; however, the character at that position
// may span several bytes. We need to find the first byte of the character. In UTF-8
// encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte.
// Therefore we decrement the index as long as there are bytes matching this pattern.
// This ensures we cut the string at the border between one character and another.
while index > 0 && (p[index] & 0b11000000) == 0b10000000 {
index -= 1;
}
&string[0..index]
}
#[derive(Clone, Copy)]
pub enum Compression {
None,
Gzip,
}
impl Compression {
pub fn from_env(var_name: &str) -> Compression {
match std::env::var(var_name) {
Ok(method) => match Compression::from_string(&method) {
Some(c) => c,
None => {
tracing::error!("Unknown compression method '{}'; using gzip.", &method);
Compression::Gzip
}
},
// Default compression method if the env var isn't set:
Err(_) => Compression::Gzip,
}
}
pub fn from_string(s: &str) -> Option<Compression> {
match s.to_lowercase().as_ref() {
"none" => Some(Compression::None),
"gzip" => Some(Compression::Gzip),
_ => None,
}
}
pub fn extension(&self) -> &str {
match self {
Compression::None => "trap",
Compression::Gzip => "trap.gz",
}
}
}
#[test]
fn limit_string_test() {
assert_eq!("hello", limit_string(&"hello world".to_owned(), 5));
assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6));
assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5));
}
#[test]
fn escape_key_test() {
assert_eq!("foo!", escape_key("foo!"));
assert_eq!("foo&lbrace;&rbrace;", escape_key("foo{}"));
assert_eq!("&lbrace;&rbrace;", escape_key("{}"));
assert_eq!("", escape_key(""));
assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb"));
assert_eq!(
"/path/to/foo&amp;&lbrace;&rbrace;&quot;&commat;&num;.rb",
escape_key("/path/to/foo&{}\"@#.rb")
);
}

View File

@@ -12,4 +12,4 @@ node-types = { path = "../node-types" }
tracing = "0.1"
tracing-subscriber = { version = "0.3.3", features = ["env-filter"] }
tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" }
tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" }

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
- Calls to `ActiveRecord::Relation#annotate` are now recognized as`SqlExecution`s so that it will be considered as a sink for queries like rb/sql-injection.

View File

@@ -133,6 +133,11 @@ private Expr sqlFragmentArgument(MethodCall call) {
or
methodName = "reload" and
result = call.getKeywordArgument("lock")
or
// Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to
// SQLi if user supplied input is passed in as an argument.
methodName = "annotate" and
result = call.getArgument(_)
)
)
}

View File

@@ -2,6 +2,7 @@ actionControllerControllerClasses
| ActiveRecord.rb:23:1:39:3 | FooController |
| ActiveRecord.rb:41:1:64:3 | BarController |
| ActiveRecord.rb:66:1:70:3 | BazController |
| ActiveRecord.rb:72:1:80:3 | AnnotatedController |
| app/controllers/comments_controller.rb:1:1:7:3 | CommentsController |
| app/controllers/foo/bars_controller.rb:3:1:39:3 | BarsController |
| app/controllers/photos_controller.rb:1:1:4:3 | PhotosController |
@@ -12,6 +13,8 @@ actionControllerActionMethods
| ActiveRecord.rb:42:3:47:5 | some_other_request_handler |
| ActiveRecord.rb:49:3:63:5 | safe_paths |
| ActiveRecord.rb:67:3:69:5 | yet_another_handler |
| ActiveRecord.rb:73:3:75:5 | index |
| ActiveRecord.rb:77:3:79:5 | unsafe_action |
| app/controllers/comments_controller.rb:2:3:3:5 | index |
| app/controllers/comments_controller.rb:5:3:6:5 | show |
| app/controllers/foo/bars_controller.rb:5:3:7:5 | index |
@@ -38,6 +41,7 @@ paramsCalls
| ActiveRecord.rb:59:12:59:17 | call to params |
| ActiveRecord.rb:62:15:62:20 | call to params |
| ActiveRecord.rb:68:21:68:26 | call to params |
| ActiveRecord.rb:78:59:78:64 | call to params |
| app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params |
| app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params |
| app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params |
@@ -57,6 +61,7 @@ paramsSources
| ActiveRecord.rb:59:12:59:17 | call to params |
| ActiveRecord.rb:62:15:62:20 | call to params |
| ActiveRecord.rb:68:21:68:26 | call to params |
| ActiveRecord.rb:78:59:78:64 | call to params |
| app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params |
| app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params |
| app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params |

View File

@@ -22,6 +22,7 @@ activeRecordSqlExecutionRanges
| ActiveRecord.rb:46:20:46:32 | ... + ... |
| ActiveRecord.rb:52:16:52:28 | "name #{...}" |
| ActiveRecord.rb:56:20:56:39 | "username = #{...}" |
| ActiveRecord.rb:78:27:78:76 | "this is an unsafe annotation:..." |
activeRecordModelClassMethodCalls
| ActiveRecord.rb:2:3:2:17 | call to has_many |
| ActiveRecord.rb:6:3:6:24 | call to belongs_to |
@@ -44,6 +45,8 @@ activeRecordModelClassMethodCalls
| ActiveRecord.rb:60:5:60:33 | call to find_by |
| ActiveRecord.rb:62:5:62:34 | call to find |
| ActiveRecord.rb:68:5:68:45 | call to delete_by |
| ActiveRecord.rb:74:13:74:54 | call to annotate |
| ActiveRecord.rb:78:13:78:77 | call to annotate |
potentiallyUnsafeSqlExecutingMethodCall
| ActiveRecord.rb:9:5:9:68 | call to find |
| ActiveRecord.rb:19:5:19:25 | call to destroy_by |
@@ -55,6 +58,7 @@ potentiallyUnsafeSqlExecutingMethodCall
| ActiveRecord.rb:46:5:46:33 | call to delete_by |
| ActiveRecord.rb:52:5:52:29 | call to order |
| ActiveRecord.rb:56:7:56:40 | call to find_by |
| ActiveRecord.rb:78:13:78:77 | call to annotate |
activeRecordModelInstantiations
| ActiveRecord.rb:9:5:9:68 | call to find | ActiveRecord.rb:5:1:15:3 | User |
| ActiveRecord.rb:13:5:13:40 | call to find_by | ActiveRecord.rb:1:1:3:3 | UserGroup |

View File

@@ -68,3 +68,13 @@ class BazController < BarController
Admin.delete_by(params[:admin_condition])
end
end
class AnnotatedController < ActionController::Base
def index
users = User.annotate("this is a safe annotation")
end
def unsafe_action
users = User.annotate("this is an unsafe annotation:#{params[:comment]}")
end
end

View File

@@ -137,3 +137,17 @@ class BazController < BarController
Admin.delete_by(params[:admin_condition])
end
end
class AnnotatedController < ActionController::Base
def index
name = params[:user_name]
# GOOD: string literal arguments not controlled by user are safe for annotations
users = User.annotate("this is a safe annotation").find_by(user_name: name)
end
def unsafe_action
name = params[:user_name]
# BAD: user input passed into annotations are vulnerable to SQLi
users = User.annotate("this is an unsafe annotation:#{params[:comment]}").find_by(user_name: name)
end
end

View File

@@ -31,6 +31,8 @@ edges
| ActiveRecordInjection.rb:99:11:99:17 | ...[...] : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... |
| ActiveRecordInjection.rb:137:21:137:26 | call to params : | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : |
| ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | ActiveRecordInjection.rb:20:22:20:30 | condition : |
| ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:59:151:74 | ...[...] : |
| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." |
nodes
| ActiveRecordInjection.rb:8:25:8:28 | name : | semmle.label | name : |
| ActiveRecordInjection.rb:8:31:8:34 | pass : | semmle.label | pass : |
@@ -80,6 +82,9 @@ nodes
| ActiveRecordInjection.rb:104:20:104:32 | ... + ... | semmle.label | ... + ... |
| ActiveRecordInjection.rb:137:21:137:26 | call to params : | semmle.label | call to params : |
| ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | semmle.label | ...[...] : |
| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." |
| ActiveRecordInjection.rb:151:59:151:64 | call to params : | semmle.label | call to params : |
| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | semmle.label | ...[...] : |
subpaths
#select
| ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | ActiveRecordInjection.rb:70:23:70:28 | call to params : | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | This SQL query depends on $@. | ActiveRecordInjection.rb:70:23:70:28 | call to params | a user-provided value |
@@ -99,3 +104,4 @@ subpaths
| ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params : | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | a user-provided value |
| ActiveRecordInjection.rb:92:21:92:35 | ...[...] | ActiveRecordInjection.rb:92:21:92:26 | call to params : | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:92:21:92:26 | call to params | a user-provided value |
| ActiveRecordInjection.rb:104:20:104:32 | ... + ... | ActiveRecordInjection.rb:98:10:98:15 | call to params : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | This SQL query depends on $@. | ActiveRecordInjection.rb:98:10:98:15 | call to params | a user-provided value |
| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | This SQL query depends on $@. | ActiveRecordInjection.rb:151:59:151:64 | call to params | a user-provided value |

View File

@@ -41,88 +41,80 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration {
}
override predicate isSink(DataFlow::Node node, string flowstate) {
// arguments to method calls...
exists(
string className, string methodName, string paramName, ClassDecl c, AbstractFunctionDecl f,
CallExpr call, int arg
AbstractFunctionDecl funcDecl, CallExpr call, string funcName, string paramName, int arg
|
(
// `NSRange.init`
className = "NSRange" and
methodName = "init(location:length:)" and
paramName = ["location", "length"]
// arguments to method calls...
exists(string className, ClassDecl c |
(
// `NSRange.init`
className = "NSRange" and
funcName = "init(location:length:)" and
paramName = ["location", "length"]
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
funcName = "character(at:)" and
paramName = "at"
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
funcName = "substring(from:)" and
paramName = "from"
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
funcName = "substring(to:)" and
paramName = "to"
or
// `NSMutableString.insert`
className = "NSMutableString" and
funcName = "insert(_:at:)" and
paramName = "at"
) and
c.getName() = className and
c.getAMember() = funcDecl and
call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and
flowstate = "String" // `String` length flowing into `NSString`
)
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
methodName = "character(at:)" and
paramName = "at"
// arguments to function calls...
// `NSMakeRange`
funcName = "NSMakeRange(_:_:)" and
paramName = ["loc", "len"] and
call.getStaticTarget() = funcDecl and
flowstate = "String" // `String` length flowing into `NSString`
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
methodName = "substring(from:)" and
paramName = "from"
or
// `NSString.character`
className = ["NSString", "NSMutableString"] and
methodName = "substring(to:)" and
paramName = "to"
or
// `NSMutableString.insert`
className = "NSMutableString" and
methodName = "insert(_:at:)" and
paramName = "at"
// arguments to method calls...
(
// `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast`
funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and
paramName = "k"
or
// `String.prefix`, `String.suffix`
funcName = ["prefix(_:)", "suffix(_:)"] and
paramName = "maxLength"
or
// `String.Index.init`
funcName = "init(encodedOffset:)" and
paramName = "offset"
or
// `String.index`
funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and
paramName = "n"
or
// `String.formIndex`
funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and
paramName = "distance"
) and
call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and
flowstate = "NSString" // `NSString` length flowing into `String`
) and
c.getName() = className and
c.getAMember() = f and // TODO: will this even work if its defined in a parent class?
call.getFunction().(ApplyExpr).getStaticTarget() = f and
f.getName() = methodName and
f.getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "String" // `String` length flowing into `NSString`
)
or
// arguments to function calls...
exists(string funcName, string paramName, CallExpr call, int arg |
// `NSMakeRange`
funcName = "NSMakeRange(_:_:)" and
paramName = ["loc", "len"] and
call.getStaticTarget().getName() = funcName and
call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "String" // `String` length flowing into `NSString`
)
or
// arguments to function calls...
exists(string funcName, string paramName, CallExpr call, int arg |
(
// `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast`
funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and
paramName = "k"
or
// `String.prefix`, `String.suffix`
funcName = ["prefix(_:)", "suffix(_:)"] and
paramName = "maxLength"
or
// `String.Index.init`
funcName = "init(encodedOffset:)" and
paramName = "offset"
or
// `String.index`
funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and
paramName = "n"
or
// `String.formIndex`
funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and
paramName = "distance"
) and
call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and
call.getFunction()
.(ApplyExpr)
.getStaticTarget()
.getParam(pragma[only_bind_into](arg))
.getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and
flowstate = "NSString" // `NSString` length flowing into `String`
// match up `funcName`, `paramName`, `arg`, `node`.
funcDecl.getName() = funcName and
funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and
call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr()
)
}

View File

@@ -1,4 +1,5 @@
edges
| StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... |
| StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... |
| StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... |
| StringLengthConflation.swift:93:28:93:31 | .length : | StringLengthConflation.swift:93:28:93:40 | ... call to -(_:_:) ... |
@@ -15,6 +16,8 @@ edges
| StringLengthConflation.swift:135:36:135:38 | .count : | StringLengthConflation.swift:135:36:135:46 | ... call to -(_:_:) ... |
| StringLengthConflation.swift:141:28:141:30 | .count : | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... |
nodes
| StringLengthConflation2.swift:37:34:37:36 | .count : | semmle.label | .count : |
| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... |
| StringLengthConflation.swift:53:43:53:46 | .length | semmle.label | .length |
| StringLengthConflation.swift:60:47:60:50 | .length : | semmle.label | .length : |
| StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | semmle.label | ... call to /(_:_:) ... |
@@ -50,6 +53,7 @@ nodes
| StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... |
subpaths
#select
| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | This String length is used in an NSString, but it may not be equivalent. |
| StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | This NSString length is used in a String, but it may not be equivalent. |
| StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. |
| StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. |

View File

@@ -0,0 +1,42 @@
// this test is in a separate file, because we want to test with a slightly different library
// implementation. In this version, some of the functions of `NSString` are in fact implemented
// in a base class `NSStringBase`.
// --- stubs ---
func print(_ items: Any...) {}
typealias unichar = UInt16
class NSObject
{
}
class NSStringBase : NSObject
{
func substring(from: Int) -> String { return "" }
}
class NSString : NSStringBase
{
init(string: String) { length = string.count }
func substring(to: Int) -> String { return "" }
private(set) var length: Int
}
// --- tests ---
func test(s: String) {
let ns = NSString(string: s)
let nstr1 = ns.substring(from: ns.length - 1) // GOOD
let nstr2 = ns.substring(from: s.count - 1) // BAD: String length used in NSString [NOT DETECTED]
let nstr3 = ns.substring(to: ns.length - 1) // GOOD
let nstr4 = ns.substring(to: s.count - 1) // BAD: String length used in NSString
print("substrings '\(nstr1)' '\(nstr2)' / '\(nstr3)' '\(nstr4)'")
}
// `begin :thumbsup: end`, with thumbs up emoji and skin tone modifier
test(s: "begin \u{0001F44D}\u{0001F3FF} end")