diff --git a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql index 73fcf034096..7a3877f638c 100644 --- a/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-078/ExecTainted.ql @@ -77,7 +77,7 @@ class ExecState extends DataFlow::FlowState { ExecState() { this = "ExecState (" + fst.getLocation() + " | " + fst + ", " + snd.getLocation() + " | " + snd + ")" and - interestingConcatenation(fst, snd) + interestingConcatenation(pragma[only_bind_into](fst), pragma[only_bind_into](snd)) } DataFlow::Node getFstNode() { result = fst } diff --git a/csharp/tools/tracing-config.lua b/csharp/tools/tracing-config.lua index b4ff0206b02..8aafc63e7e5 100644 --- a/csharp/tools/tracing-config.lua +++ b/csharp/tools/tracing-config.lua @@ -2,7 +2,54 @@ function RegisterExtractorPack(id) local extractor = GetPlatformToolsDirectory() .. 'Semmle.Extraction.CSharp.Driver' if OperatingSystem == 'windows' then extractor = extractor .. '.exe' end + + function DotnetMatcherBuild(compilerName, compilerPath, compilerArguments, + _languageId) + if compilerName ~= 'dotnet' and compilerName ~= 'dotnet.exe' then + return nil + end + + -- The dotnet CLI has the following usage instructions: + -- dotnet [sdk-options] [command] [command-options] [arguments] + -- we are interested in dotnet build, which has the following usage instructions: + -- dotnet [options] build [...] + -- For now, parse the command line as follows: + -- Everything that starts with `-` (or `/`) will be ignored. + -- The first non-option argument is treated as the command. + -- if that's `build`, we append `/p:UseSharedCompilation=false` to the command line, + -- otherwise we do nothing. + local match = false + local argv = compilerArguments.argv + if OperatingSystem == 'windows' then + -- let's hope that this split matches the escaping rules `dotnet` applies to command line arguments + -- or, at least, that it is close enough + argv = + NativeArgumentsToArgv(compilerArguments.nativeArgumentPointer) + end + for i, arg in ipairs(argv) do + -- dotnet options start with either - or / (both are legal) + local firstCharacter = string.sub(arg, 1, 1) + if not (firstCharacter == '-') and not (firstCharacter == '/') then + Log(1, 'Dotnet subcommand detected: %s', arg) + if arg == 'build' then match = true end + break + end + end + if match then + return { + order = ORDER_REPLACE, + invocation = BuildExtractorInvocation(id, compilerPath, + compilerPath, + compilerArguments, nil, { + '/p:UseSharedCompilation=false' + }) + } + end + return nil + end + local windowsMatchers = { + DotnetMatcherBuild, CreatePatternMatcher({'^dotnet%.exe$'}, MatchCompilerName, extractor, { prepend = {'--dotnetexec', '--cil'}, order = ORDER_BEFORE @@ -10,22 +57,21 @@ function RegisterExtractorPack(id) CreatePatternMatcher({'^csc.*%.exe$'}, MatchCompilerName, extractor, { prepend = {'--compiler', '"${compiler}"', '--cil'}, order = ORDER_BEFORE - }), CreatePatternMatcher({'^fakes.*%.exe$', 'moles.*%.exe'}, MatchCompilerName, nil, {trace = false}) } local posixMatchers = { - CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName, - extractor, { - prepend = {'--compiler', '"${compiler}"', '--cil'}, - order = ORDER_BEFORE - - }), + DotnetMatcherBuild, CreatePatternMatcher({'^mono', '^dotnet$'}, MatchCompilerName, extractor, { prepend = {'--dotnetexec', '--cil'}, order = ORDER_BEFORE + }), + CreatePatternMatcher({'^mcs%.exe$', '^csc%.exe$'}, MatchCompilerName, + extractor, { + prepend = {'--compiler', '"${compiler}"', '--cil'}, + order = ORDER_BEFORE }), function(compilerName, compilerPath, compilerArguments, _languageId) if MatchCompilerName('^msbuild$', compilerName, compilerPath, compilerArguments) or @@ -49,7 +95,6 @@ function RegisterExtractorPack(id) else return posixMatchers end - end -- Return a list of minimum supported versions of the configuration file format diff --git a/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst b/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst index 4353f9402b7..be66125846a 100644 --- a/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst +++ b/docs/codeql/codeql-overview/supported-languages-and-frameworks.rst @@ -11,14 +11,17 @@ CodeQL. Languages and compilers ####################### -CodeQL supports the following languages and compilers. +The current versions of the CodeQL CLI (`changelog `__, `releases `__), +CodeQL library packs (`source `__), +and CodeQL bundle (`releases `__) +support the following languages and compilers. .. include:: ../support/reusables/versions-compilers.rst Frameworks and libraries ######################## -The libraries and queries in the current version of CodeQL have been explicitly checked against the libraries and frameworks listed below. +The current versions of the CodeQL library and query packs (`source `__) have been explicitly checked against the libraries and frameworks listed below. .. pull-quote:: diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 12bcd5af8e6..fc5410648cf 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -1,6 +1,10 @@ C and C++ built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/cpp-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/cpp-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -14,6 +18,10 @@ C and C++ built-in support C# built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/csharp-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/csharp-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -33,6 +41,10 @@ C# built-in support Go built-in support ================================ +Provided by the current versions of the +CodeQL query pack ``codeql/go-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/go-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -84,6 +96,10 @@ Go built-in support Java built-in support ================================== +Provided by the current versions of the +CodeQL query pack ``codeql/java-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/java-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -113,6 +129,10 @@ Java built-in support JavaScript and TypeScript built-in support ======================================================= +Provided by the current versions of the +CodeQL query pack ``codeql/javascript-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/javascript-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable @@ -156,6 +176,10 @@ JavaScript and TypeScript built-in support Python built-in support ==================================== +Provided by the current versions of the +CodeQL query pack ``codeql/python-queries`` (`changelog `__, `source `__) +and the CodeQL library pack ``codeql/python-all`` (`changelog `__, `source `__). + .. csv-table:: :header-rows: 1 :class: fullWidthTable diff --git a/ruby/Cargo.lock b/ruby/Cargo.lock index c92ef0214a6..2e2f205ce02 100644 Binary files a/ruby/Cargo.lock and b/ruby/Cargo.lock differ diff --git a/ruby/extractor/Cargo.toml b/ruby/extractor/Cargo.toml index 071b1162063..c1a7c4424b9 100644 --- a/ruby/extractor/Cargo.toml +++ b/ruby/extractor/Cargo.toml @@ -11,7 +11,7 @@ flate2 = "1.0" node-types = { path = "../node-types" } tree-sitter = "0.19" tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" } -tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" } +tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" } clap = "3.0" tracing = "0.1" tracing-subscriber = { version = "0.3.3", features = ["env-filter"] } diff --git a/ruby/extractor/src/extractor.rs b/ruby/extractor/src/extractor.rs index 8cdaff1b738..db280634ae5 100644 --- a/ruby/extractor/src/extractor.rs +++ b/ruby/extractor/src/extractor.rs @@ -1,161 +1,112 @@ +use crate::trap; use node_types::{EntryKind, Field, NodeTypeMap, Storage, TypeName}; -use std::borrow::Cow; use std::collections::BTreeMap as Map; use std::collections::BTreeSet as Set; use std::fmt; -use std::io::Write; use std::path::Path; use tracing::{error, info, span, Level}; use tree_sitter::{Language, Node, Parser, Range, Tree}; -pub struct TrapWriter { - /// The accumulated trap entries - trap_output: Vec, - /// A counter for generating fresh labels - counter: u32, - /// cache of global keys - global_keys: std::collections::HashMap, +pub fn populate_file(writer: &mut trap::Writer, absolute_path: &Path) -> trap::Label { + let (file_label, fresh) = + writer.global_id(&trap::full_id_for_file(&normalize_path(absolute_path))); + if fresh { + writer.add_tuple( + "files", + vec![ + trap::Arg::Label(file_label), + trap::Arg::String(normalize_path(absolute_path)), + ], + ); + populate_parent_folders(writer, file_label, absolute_path.parent()); + } + file_label } -pub fn new_trap_writer() -> TrapWriter { - TrapWriter { - counter: 0, - trap_output: Vec::new(), - global_keys: std::collections::HashMap::new(), +fn populate_empty_file(writer: &mut trap::Writer) -> trap::Label { + let (file_label, fresh) = writer.global_id("empty;sourcefile"); + if fresh { + writer.add_tuple( + "files", + vec![ + trap::Arg::Label(file_label), + trap::Arg::String("".to_string()), + ], + ); } + file_label } -impl TrapWriter { - /// Gets a label that will hold the unique ID of the passed string at import time. - /// This can be used for incrementally importable TRAP files -- use globally unique - /// strings to compute a unique ID for table tuples. - /// - /// Note: You probably want to make sure that the key strings that you use are disjoint - /// for disjoint column types; the standard way of doing this is to prefix (or append) - /// the column type name to the ID. Thus, you might identify methods in Java by the - /// full ID "methods_com.method.package.DeclaringClass.method(argumentList)". +pub fn populate_empty_location(writer: &mut trap::Writer) { + let file_label = populate_empty_file(writer); + location(writer, file_label, 0, 0, 0, 0); +} - fn fresh_id(&mut self) -> Label { - let label = Label(self.counter); - self.counter += 1; - self.trap_output.push(TrapEntry::FreshId(label)); - label - } - - fn global_id(&mut self, key: &str) -> (Label, bool) { - if let Some(label) = self.global_keys.get(key) { - return (*label, false); - } - let label = Label(self.counter); - self.counter += 1; - self.global_keys.insert(key.to_owned(), label); - self.trap_output - .push(TrapEntry::MapLabelToKey(label, key.to_owned())); - (label, true) - } - - fn add_tuple(&mut self, table_name: &str, args: Vec) { - self.trap_output - .push(TrapEntry::GenericTuple(table_name.to_owned(), args)) - } - - fn populate_file(&mut self, absolute_path: &Path) -> Label { - let (file_label, fresh) = self.global_id(&full_id_for_file(absolute_path)); - if fresh { - self.add_tuple( - "files", - vec![ - Arg::Label(file_label), - Arg::String(normalize_path(absolute_path)), - ], - ); - self.populate_parent_folders(file_label, absolute_path.parent()); - } - file_label - } - - fn populate_empty_file(&mut self) -> Label { - let (file_label, fresh) = self.global_id("empty;sourcefile"); - if fresh { - self.add_tuple( - "files", - vec![Arg::Label(file_label), Arg::String("".to_string())], - ); - } - file_label - } - - pub fn populate_empty_location(&mut self) { - let file_label = self.populate_empty_file(); - self.location(file_label, 0, 0, 0, 0); - } - - fn populate_parent_folders(&mut self, child_label: Label, path: Option<&Path>) { - let mut path = path; - let mut child_label = child_label; - loop { - match path { - None => break, - Some(folder) => { - let (folder_label, fresh) = self.global_id(&full_id_for_folder(folder)); - self.add_tuple( - "containerparent", - vec![Arg::Label(folder_label), Arg::Label(child_label)], +pub fn populate_parent_folders( + writer: &mut trap::Writer, + child_label: trap::Label, + path: Option<&Path>, +) { + let mut path = path; + let mut child_label = child_label; + loop { + match path { + None => break, + Some(folder) => { + let (folder_label, fresh) = + writer.global_id(&trap::full_id_for_folder(&normalize_path(folder))); + writer.add_tuple( + "containerparent", + vec![ + trap::Arg::Label(folder_label), + trap::Arg::Label(child_label), + ], + ); + if fresh { + writer.add_tuple( + "folders", + vec![ + trap::Arg::Label(folder_label), + trap::Arg::String(normalize_path(folder)), + ], ); - if fresh { - self.add_tuple( - "folders", - vec![ - Arg::Label(folder_label), - Arg::String(normalize_path(folder)), - ], - ); - path = folder.parent(); - child_label = folder_label; - } else { - break; - } + path = folder.parent(); + child_label = folder_label; + } else { + break; } } } } +} - fn location( - &mut self, - file_label: Label, - start_line: usize, - start_column: usize, - end_line: usize, - end_column: usize, - ) -> Label { - let (loc_label, fresh) = self.global_id(&format!( - "loc,{{{}}},{},{},{},{}", - file_label, start_line, start_column, end_line, end_column - )); - if fresh { - self.add_tuple( - "locations_default", - vec![ - Arg::Label(loc_label), - Arg::Label(file_label), - Arg::Int(start_line), - Arg::Int(start_column), - Arg::Int(end_line), - Arg::Int(end_column), - ], - ); - } - loc_label - } - - fn comment(&mut self, text: String) { - self.trap_output.push(TrapEntry::Comment(text)); - } - - pub fn output(self, writer: &mut dyn Write) -> std::io::Result<()> { - write!(writer, "{}", Program(self.trap_output)) +fn location( + writer: &mut trap::Writer, + file_label: trap::Label, + start_line: usize, + start_column: usize, + end_line: usize, + end_column: usize, +) -> trap::Label { + let (loc_label, fresh) = writer.global_id(&format!( + "loc,{{{}}},{},{},{},{}", + file_label, start_line, start_column, end_line, end_column + )); + if fresh { + writer.add_tuple( + "locations_default", + vec![ + trap::Arg::Label(loc_label), + trap::Arg::Label(file_label), + trap::Arg::Int(start_line), + trap::Arg::Int(start_column), + trap::Arg::Int(end_line), + trap::Arg::Int(end_column), + ], + ); } + loc_label } /// Extracts the source file at `path`, which is assumed to be canonicalized. @@ -163,71 +114,43 @@ pub fn extract( language: Language, language_prefix: &str, schema: &NodeTypeMap, - trap_writer: &mut TrapWriter, + trap_writer: &mut trap::Writer, path: &Path, source: &[u8], ranges: &[Range], ) -> std::io::Result<()> { + let path_str = format!("{}", path.display()); let span = span!( Level::TRACE, "extract", - file = %path.display() + file = %path_str ); let _enter = span.enter(); - info!("extracting: {}", path.display()); + info!("extracting: {}", path_str); let mut parser = Parser::new(); parser.set_language(language).unwrap(); parser.set_included_ranges(ranges).unwrap(); let tree = parser.parse(&source, None).expect("Failed to parse file"); - trap_writer.comment(format!("Auto-generated TRAP file for {}", path.display())); - let file_label = &trap_writer.populate_file(path); - let mut visitor = Visitor { + trap_writer.comment(format!("Auto-generated TRAP file for {}", path_str)); + let file_label = populate_file(trap_writer, path); + let mut visitor = Visitor::new( source, trap_writer, // TODO: should we handle path strings that are not valid UTF8 better? - path: format!("{}", path.display()), - file_label: *file_label, - toplevel_child_counter: 0, - stack: Vec::new(), + &path_str, + file_label, language_prefix, schema, - }; + ); traverse(&tree, &mut visitor); parser.reset(); Ok(()) } -/// Escapes a string for use in a TRAP key, by replacing special characters with -/// HTML entities. -fn escape_key<'a, S: Into>>(key: S) -> Cow<'a, str> { - fn needs_escaping(c: char) -> bool { - matches!(c, '&' | '{' | '}' | '"' | '@' | '#') - } - - let key = key.into(); - if key.contains(needs_escaping) { - let mut escaped = String::with_capacity(2 * key.len()); - for c in key.chars() { - match c { - '&' => escaped.push_str("&"), - '{' => escaped.push_str("{"), - '}' => escaped.push_str("}"), - '"' => escaped.push_str("""), - '@' => escaped.push_str("@"), - '#' => escaped.push_str("#"), - _ => escaped.push(c), - } - } - Cow::Owned(escaped) - } else { - key - } -} - /// Normalizes the path according the common CodeQL specification. Assumes that /// `path` has already been canonicalized using `std::fs::canonicalize`. fn normalize_path(path: &Path) -> String { @@ -267,34 +190,28 @@ fn normalize_path(path: &Path) -> String { } } -fn full_id_for_file(path: &Path) -> String { - format!("{};sourcefile", escape_key(&normalize_path(path))) -} - -fn full_id_for_folder(path: &Path) -> String { - format!("{};folder", escape_key(&normalize_path(path))) -} - struct ChildNode { field_name: Option<&'static str>, - label: Label, + label: trap::Label, type_name: TypeName, } struct Visitor<'a> { /// The file path of the source code (as string) - path: String, + path: &'a str, /// The label to use whenever we need to refer to the `@file` entity of this /// source file. - file_label: Label, + file_label: trap::Label, /// The source code as a UTF-8 byte array source: &'a [u8], - /// A TrapWriter to accumulate trap entries - trap_writer: &'a mut TrapWriter, + /// A trap::Writer to accumulate trap entries + trap_writer: &'a mut trap::Writer, /// A counter for top-level child nodes toplevel_child_counter: usize, - /// Language prefix - language_prefix: &'a str, + /// Language-specific name of the AST info table + ast_node_info_table_name: String, + /// Language-specific name of the tokeninfo table + tokeninfo_table_name: String, /// A lookup table from type name to node types schema: &'a NodeTypeMap, /// A stack for gathering information from child nodes. Whenever a node is @@ -303,27 +220,48 @@ struct Visitor<'a> { /// node the list containing the child data is popped from the stack and /// matched against the dbscheme for the node. If the expectations are met /// the corresponding row definitions are added to the trap_output. - stack: Vec<(Label, usize, Vec)>, + stack: Vec<(trap::Label, usize, Vec)>, } -impl Visitor<'_> { +impl<'a> Visitor<'a> { + fn new( + source: &'a [u8], + trap_writer: &'a mut trap::Writer, + path: &'a str, + file_label: trap::Label, + language_prefix: &str, + schema: &'a NodeTypeMap, + ) -> Visitor<'a> { + Visitor { + path, + file_label, + source, + trap_writer, + toplevel_child_counter: 0, + ast_node_info_table_name: format!("{}_ast_node_info", language_prefix), + tokeninfo_table_name: format!("{}_tokeninfo", language_prefix), + schema, + stack: Vec::new(), + } + } + fn record_parse_error( &mut self, error_message: String, full_error_message: String, - loc: Label, + loc: trap::Label, ) { error!("{}", full_error_message); let id = self.trap_writer.fresh_id(); self.trap_writer.add_tuple( "diagnostics", vec![ - Arg::Label(id), - Arg::Int(40), // severity 40 = error - Arg::String("parse_error".to_string()), - Arg::String(error_message), - Arg::String(full_error_message), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Int(40), // severity 40 = error + trap::Arg::String("parse_error".to_string()), + trap::Arg::String(error_message), + trap::Arg::String(full_error_message), + trap::Arg::Label(loc), ], ); } @@ -335,7 +273,8 @@ impl Visitor<'_> { node: Node, ) { let (start_line, start_column, end_line, end_column) = location_for(self.source, node); - let loc = self.trap_writer.location( + let loc = location( + self.trap_writer, self.file_label, start_line, start_column, @@ -374,7 +313,8 @@ impl Visitor<'_> { } let (id, _, child_nodes) = self.stack.pop().expect("Vistor: empty stack"); let (start_line, start_column, end_line, end_column) = location_for(self.source, node); - let loc = self.trap_writer.location( + let loc = location( + self.trap_writer, self.file_label, start_line, start_column, @@ -402,19 +342,19 @@ impl Visitor<'_> { match &table.kind { EntryKind::Token { kind_id, .. } => { self.trap_writer.add_tuple( - &format!("{}_ast_node_info", self.language_prefix), + &self.ast_node_info_table_name, vec![ - Arg::Label(id), - Arg::Label(parent_id), - Arg::Int(parent_index), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Label(parent_id), + trap::Arg::Int(parent_index), + trap::Arg::Label(loc), ], ); self.trap_writer.add_tuple( - &format!("{}_tokeninfo", self.language_prefix), + &self.tokeninfo_table_name, vec![ - Arg::Label(id), - Arg::Int(*kind_id), + trap::Arg::Label(id), + trap::Arg::Int(*kind_id), sliced_source_arg(self.source, node), ], ); @@ -425,15 +365,15 @@ impl Visitor<'_> { } => { if let Some(args) = self.complex_node(&node, fields, &child_nodes, id) { self.trap_writer.add_tuple( - &format!("{}_ast_node_info", self.language_prefix), + &self.ast_node_info_table_name, vec![ - Arg::Label(id), - Arg::Label(parent_id), - Arg::Int(parent_index), - Arg::Label(loc), + trap::Arg::Label(id), + trap::Arg::Label(parent_id), + trap::Arg::Int(parent_index), + trap::Arg::Label(loc), ], ); - let mut all_args = vec![Arg::Label(id)]; + let mut all_args = vec![trap::Arg::Label(id)]; all_args.extend(args); self.trap_writer.add_tuple(table_name, all_args); } @@ -472,9 +412,9 @@ impl Visitor<'_> { node: &Node, fields: &[Field], child_nodes: &[ChildNode], - parent_id: Label, - ) -> Option> { - let mut map: Map<&Option, (&Field, Vec)> = Map::new(); + parent_id: trap::Label, + ) -> Option> { + let mut map: Map<&Option, (&Field, Vec)> = Map::new(); for field in fields { map.insert(&field.name, (field, Vec::new())); } @@ -488,9 +428,9 @@ impl Visitor<'_> { { // We can safely unwrap because type_matches checks the key is in the map. let (int_value, _) = int_mapping.get(&child_node.type_name.kind).unwrap(); - values.push(Arg::Int(*int_value)); + values.push(trap::Arg::Int(*int_value)); } else { - values.push(Arg::Label(child_node.label)); + values.push(trap::Arg::Label(child_node.label)); } } else if field.name.is_some() { let error_message = format!( @@ -569,9 +509,9 @@ impl Visitor<'_> { ); break; } - let mut args = vec![Arg::Label(parent_id)]; + let mut args = vec![trap::Arg::Label(parent_id)]; if *has_index { - args.push(Arg::Int(index)) + args.push(trap::Arg::Int(index)) } args.push(child_value.clone()); self.trap_writer.add_tuple(table_name, args); @@ -625,9 +565,9 @@ impl Visitor<'_> { } // Emit a slice of a source file as an Arg. -fn sliced_source_arg(source: &[u8], n: Node) -> Arg { +fn sliced_source_arg(source: &[u8], n: Node) -> trap::Arg { let range = n.byte_range(); - Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) + trap::Arg::String(String::from_utf8_lossy(&source[range.start..range.end]).into_owned()) } // Emit a pair of `TrapEntry`s for the provided node, appropriately calibrated. @@ -699,59 +639,6 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) { } } -pub struct Program(Vec); - -impl fmt::Display for Program { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut text = String::new(); - for trap_entry in &self.0 { - text.push_str(&format!("{}\n", trap_entry)); - } - write!(f, "{}", text) - } -} - -enum TrapEntry { - /// Maps the label to a fresh id, e.g. `#123=*`. - FreshId(Label), - /// Maps the label to a key, e.g. `#7=@"foo"`. - MapLabelToKey(Label, String), - /// foo_bar(arg*) - GenericTuple(String, Vec), - Comment(String), -} -impl fmt::Display for TrapEntry { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - TrapEntry::FreshId(label) => write!(f, "{}=*", label), - TrapEntry::MapLabelToKey(label, key) => { - write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\"")) - } - TrapEntry::GenericTuple(name, args) => { - write!(f, "{}(", name)?; - for (index, arg) in args.iter().enumerate() { - if index > 0 { - write!(f, ",")?; - } - write!(f, "{}", arg)?; - } - write!(f, ")") - } - TrapEntry::Comment(line) => write!(f, "// {}", line), - } - } -} - -#[derive(Debug, Copy, Clone)] -// Identifiers of the form #0, #1... -struct Label(u32); - -impl fmt::Display for Label { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "#{:x}", self.0) - } -} - // Numeric indices. #[derive(Debug, Copy, Clone)] struct Index(usize); @@ -761,69 +648,3 @@ impl fmt::Display for Index { write!(f, "{}", self.0) } } - -// Some untyped argument to a TrapEntry. -#[derive(Debug, Clone)] -enum Arg { - Label(Label), - Int(usize), - String(String), -} - -const MAX_STRLEN: usize = 1048576; - -impl fmt::Display for Arg { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Arg::Label(x) => write!(f, "{}", x), - Arg::Int(x) => write!(f, "{}", x), - Arg::String(x) => write!( - f, - "\"{}\"", - limit_string(x, MAX_STRLEN).replace("\"", "\"\"") - ), - } - } -} - -/// Limit the length (in bytes) of a string. If the string's length in bytes is -/// less than or equal to the limit then the entire string is returned. Otherwise -/// the string is sliced at the provided limit. If there is a multi-byte character -/// at the limit then the returned slice will be slightly shorter than the limit to -/// avoid splitting that multi-byte character. -fn limit_string(string: &str, max_size: usize) -> &str { - if string.len() <= max_size { - return string; - } - let p = string.as_bytes(); - let mut index = max_size; - // We want to clip the string at [max_size]; however, the character at that position - // may span several bytes. We need to find the first byte of the character. In UTF-8 - // encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte. - // Therefore we decrement the index as long as there are bytes matching this pattern. - // This ensures we cut the string at the border between one character and another. - while index > 0 && (p[index] & 0b11000000) == 0b10000000 { - index -= 1; - } - &string[0..index] -} - -#[test] -fn limit_string_test() { - assert_eq!("hello", limit_string(&"hello world".to_owned(), 5)); - assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6)); - assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5)); -} - -#[test] -fn escape_key_test() { - assert_eq!("foo!", escape_key("foo!")); - assert_eq!("foo{}", escape_key("foo{}")); - assert_eq!("{}", escape_key("{}")); - assert_eq!("", escape_key("")); - assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb")); - assert_eq!( - "/path/to/foo&{}"@#.rb", - escape_key("/path/to/foo&{}\"@#.rb") - ); -} diff --git a/ruby/extractor/src/main.rs b/ruby/extractor/src/main.rs index 359f84c8077..a3ed7ed7933 100644 --- a/ruby/extractor/src/main.rs +++ b/ruby/extractor/src/main.rs @@ -1,4 +1,5 @@ mod extractor; +mod trap; #[macro_use] extern crate lazy_static; @@ -6,50 +7,13 @@ extern crate num_cpus; use clap::arg; use encoding::{self}; -use flate2::write::GzEncoder; use rayon::prelude::*; use std::borrow::Cow; use std::fs; -use std::io::{BufRead, BufWriter}; +use std::io::BufRead; use std::path::{Path, PathBuf}; use tree_sitter::{Language, Parser, Range}; -enum TrapCompression { - None, - Gzip, -} - -impl TrapCompression { - fn from_env() -> TrapCompression { - match std::env::var("CODEQL_RUBY_TRAP_COMPRESSION") { - Ok(method) => match TrapCompression::from_string(&method) { - Some(c) => c, - None => { - tracing::error!("Unknown compression method '{}'; using gzip.", &method); - TrapCompression::Gzip - } - }, - // Default compression method if the env var isn't set: - Err(_) => TrapCompression::Gzip, - } - } - - fn from_string(s: &str) -> Option { - match s.to_lowercase().as_ref() { - "none" => Some(TrapCompression::None), - "gzip" => Some(TrapCompression::Gzip), - _ => None, - } - } - - fn extension(&self) -> &str { - match self { - TrapCompression::None => "trap", - TrapCompression::Gzip => "trap.gz", - } - } -} - /** * Gets the number of threads the extractor should use, by reading the * CODEQL_THREADS environment variable and using it as described in the @@ -141,7 +105,7 @@ fn main() -> std::io::Result<()> { .value_of("output-dir") .expect("missing --output-dir"); let trap_dir = PathBuf::from(trap_dir); - let trap_compression = TrapCompression::from_env(); + let trap_compression = trap::Compression::from_env("CODEQL_RUBY_TRAP_COMPRESSION"); let file_list = matches.value_of("file-list").expect("missing --file-list"); let file_list = fs::File::open(file_list)?; @@ -165,7 +129,7 @@ fn main() -> std::io::Result<()> { let mut source = std::fs::read(&path)?; let mut needs_conversion = false; let code_ranges; - let mut trap_writer = extractor::new_trap_writer(); + let mut trap_writer = trap::Writer::new(); if path.extension().map_or(false, |x| x == "erb") { tracing::info!("scanning: {}", path.display()); extractor::extract( @@ -246,33 +210,25 @@ fn main() -> std::io::Result<()> { } else { std::fs::copy(&path, &src_archive_file)?; } - write_trap(&trap_dir, path, trap_writer, &trap_compression) + write_trap(&trap_dir, path, &trap_writer, trap_compression) }) .expect("failed to extract files"); let path = PathBuf::from("extras"); - let mut trap_writer = extractor::new_trap_writer(); - trap_writer.populate_empty_location(); - write_trap(&trap_dir, path, trap_writer, &trap_compression) + let mut trap_writer = trap::Writer::new(); + extractor::populate_empty_location(&mut trap_writer); + write_trap(&trap_dir, path, &trap_writer, trap_compression) } fn write_trap( trap_dir: &Path, path: PathBuf, - trap_writer: extractor::TrapWriter, - trap_compression: &TrapCompression, + trap_writer: &trap::Writer, + trap_compression: trap::Compression, ) -> std::io::Result<()> { let trap_file = path_for(trap_dir, &path, trap_compression.extension()); std::fs::create_dir_all(&trap_file.parent().unwrap())?; - let trap_file = std::fs::File::create(&trap_file)?; - let mut trap_file = BufWriter::new(trap_file); - match trap_compression { - TrapCompression::None => trap_writer.output(&mut trap_file), - TrapCompression::Gzip => { - let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); - trap_writer.output(&mut compressed_writer) - } - } + trap_writer.write_to_file(&trap_file, trap_compression) } fn scan_erb( diff --git a/ruby/extractor/src/trap.rs b/ruby/extractor/src/trap.rs new file mode 100644 index 00000000000..d64c520c4cc --- /dev/null +++ b/ruby/extractor/src/trap.rs @@ -0,0 +1,272 @@ +use std::borrow::Cow; +use std::fmt; +use std::io::{BufWriter, Write}; +use std::path::Path; + +use flate2::write::GzEncoder; + +pub struct Writer { + /// The accumulated trap entries + trap_output: Vec, + /// A counter for generating fresh labels + counter: u32, + /// cache of global keys + global_keys: std::collections::HashMap, +} + +impl Writer { + pub fn new() -> Writer { + Writer { + counter: 0, + trap_output: Vec::new(), + global_keys: std::collections::HashMap::new(), + } + } + + pub fn fresh_id(&mut self) -> Label { + let label = Label(self.counter); + self.counter += 1; + self.trap_output.push(Entry::FreshId(label)); + label + } + + /// Gets a label that will hold the unique ID of the passed string at import time. + /// This can be used for incrementally importable TRAP files -- use globally unique + /// strings to compute a unique ID for table tuples. + /// + /// Note: You probably want to make sure that the key strings that you use are disjoint + /// for disjoint column types; the standard way of doing this is to prefix (or append) + /// the column type name to the ID. Thus, you might identify methods in Java by the + /// full ID "methods_com.method.package.DeclaringClass.method(argumentList)". + pub fn global_id(&mut self, key: &str) -> (Label, bool) { + if let Some(label) = self.global_keys.get(key) { + return (*label, false); + } + let label = Label(self.counter); + self.counter += 1; + self.global_keys.insert(key.to_owned(), label); + self.trap_output + .push(Entry::MapLabelToKey(label, key.to_owned())); + (label, true) + } + + pub fn add_tuple(&mut self, table_name: &str, args: Vec) { + self.trap_output + .push(Entry::GenericTuple(table_name.to_owned(), args)) + } + + pub fn comment(&mut self, text: String) { + self.trap_output.push(Entry::Comment(text)); + } + + pub fn write_to_file(&self, path: &Path, compression: Compression) -> std::io::Result<()> { + let trap_file = std::fs::File::create(path)?; + let mut trap_file = BufWriter::new(trap_file); + match compression { + Compression::None => self.write_trap_entries(&mut trap_file), + Compression::Gzip => { + let mut compressed_writer = GzEncoder::new(trap_file, flate2::Compression::fast()); + self.write_trap_entries(&mut compressed_writer) + } + } + } + + fn write_trap_entries(&self, file: &mut W) -> std::io::Result<()> { + for trap_entry in &self.trap_output { + writeln!(file, "{}", trap_entry)?; + } + std::io::Result::Ok(()) + } +} + +pub enum Entry { + /// Maps the label to a fresh id, e.g. `#123=*`. + FreshId(Label), + /// Maps the label to a key, e.g. `#7=@"foo"`. + MapLabelToKey(Label, String), + /// foo_bar(arg*) + GenericTuple(String, Vec), + Comment(String), +} + +impl fmt::Display for Entry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Entry::FreshId(label) => write!(f, "{}=*", label), + Entry::MapLabelToKey(label, key) => { + write!(f, "{}=@\"{}\"", label, key.replace("\"", "\"\"")) + } + Entry::GenericTuple(name, args) => { + write!(f, "{}(", name)?; + for (index, arg) in args.iter().enumerate() { + if index > 0 { + write!(f, ",")?; + } + write!(f, "{}", arg)?; + } + write!(f, ")") + } + Entry::Comment(line) => write!(f, "// {}", line), + } + } +} + +#[derive(Debug, Copy, Clone)] +// Identifiers of the form #0, #1... +pub struct Label(u32); + +impl fmt::Display for Label { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#{:x}", self.0) + } +} + +// Some untyped argument to a TrapEntry. +#[derive(Debug, Clone)] +pub enum Arg { + Label(Label), + Int(usize), + String(String), +} + +const MAX_STRLEN: usize = 1048576; + +impl fmt::Display for Arg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Arg::Label(x) => write!(f, "{}", x), + Arg::Int(x) => write!(f, "{}", x), + Arg::String(x) => write!( + f, + "\"{}\"", + limit_string(x, MAX_STRLEN).replace("\"", "\"\"") + ), + } + } +} + +pub struct Program(Vec); + +impl fmt::Display for Program { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut text = String::new(); + for trap_entry in &self.0 { + text.push_str(&format!("{}\n", trap_entry)); + } + write!(f, "{}", text) + } +} + +pub fn full_id_for_file(normalized_path: &str) -> String { + format!("{};sourcefile", escape_key(normalized_path)) +} + +pub fn full_id_for_folder(normalized_path: &str) -> String { + format!("{};folder", escape_key(normalized_path)) +} + +/// Escapes a string for use in a TRAP key, by replacing special characters with +/// HTML entities. +fn escape_key<'a, S: Into>>(key: S) -> Cow<'a, str> { + fn needs_escaping(c: char) -> bool { + matches!(c, '&' | '{' | '}' | '"' | '@' | '#') + } + + let key = key.into(); + if key.contains(needs_escaping) { + let mut escaped = String::with_capacity(2 * key.len()); + for c in key.chars() { + match c { + '&' => escaped.push_str("&"), + '{' => escaped.push_str("{"), + '}' => escaped.push_str("}"), + '"' => escaped.push_str("""), + '@' => escaped.push_str("@"), + '#' => escaped.push_str("#"), + _ => escaped.push(c), + } + } + Cow::Owned(escaped) + } else { + key + } +} + +/// Limit the length (in bytes) of a string. If the string's length in bytes is +/// less than or equal to the limit then the entire string is returned. Otherwise +/// the string is sliced at the provided limit. If there is a multi-byte character +/// at the limit then the returned slice will be slightly shorter than the limit to +/// avoid splitting that multi-byte character. +fn limit_string(string: &str, max_size: usize) -> &str { + if string.len() <= max_size { + return string; + } + let p = string.as_bytes(); + let mut index = max_size; + // We want to clip the string at [max_size]; however, the character at that position + // may span several bytes. We need to find the first byte of the character. In UTF-8 + // encoded data any byte that matches the bit pattern 10XXXXXX is not a start byte. + // Therefore we decrement the index as long as there are bytes matching this pattern. + // This ensures we cut the string at the border between one character and another. + while index > 0 && (p[index] & 0b11000000) == 0b10000000 { + index -= 1; + } + &string[0..index] +} + +#[derive(Clone, Copy)] +pub enum Compression { + None, + Gzip, +} + +impl Compression { + pub fn from_env(var_name: &str) -> Compression { + match std::env::var(var_name) { + Ok(method) => match Compression::from_string(&method) { + Some(c) => c, + None => { + tracing::error!("Unknown compression method '{}'; using gzip.", &method); + Compression::Gzip + } + }, + // Default compression method if the env var isn't set: + Err(_) => Compression::Gzip, + } + } + + pub fn from_string(s: &str) -> Option { + match s.to_lowercase().as_ref() { + "none" => Some(Compression::None), + "gzip" => Some(Compression::Gzip), + _ => None, + } + } + + pub fn extension(&self) -> &str { + match self { + Compression::None => "trap", + Compression::Gzip => "trap.gz", + } + } +} + +#[test] +fn limit_string_test() { + assert_eq!("hello", limit_string(&"hello world".to_owned(), 5)); + assert_eq!("hi ☹", limit_string(&"hi ☹☹".to_owned(), 6)); + assert_eq!("hi ", limit_string(&"hi ☹☹".to_owned(), 5)); +} + +#[test] +fn escape_key_test() { + assert_eq!("foo!", escape_key("foo!")); + assert_eq!("foo{}", escape_key("foo{}")); + assert_eq!("{}", escape_key("{}")); + assert_eq!("", escape_key("")); + assert_eq!("/path/to/foo.rb", escape_key("/path/to/foo.rb")); + assert_eq!( + "/path/to/foo&{}"@#.rb", + escape_key("/path/to/foo&{}\"@#.rb") + ); +} diff --git a/ruby/generator/Cargo.toml b/ruby/generator/Cargo.toml index 806dce9d609..5a70c43d7b3 100644 --- a/ruby/generator/Cargo.toml +++ b/ruby/generator/Cargo.toml @@ -12,4 +12,4 @@ node-types = { path = "../node-types" } tracing = "0.1" tracing-subscriber = { version = "0.3.3", features = ["env-filter"] } tree-sitter-embedded-template = { git = "https://github.com/tree-sitter/tree-sitter-embedded-template.git", rev = "1a538da253d73f896b9f6c0c7d79cda58791ac5c" } -tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "5b305c3cd32db10494cedd2743de6bbe32f1a573" } +tree-sitter-ruby = { git = "https://github.com/tree-sitter/tree-sitter-ruby.git", rev = "e75d04404c9dd71ad68850d5c672b226d5e694f3" } diff --git a/ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md b/ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md new file mode 100644 index 00000000000..60ab137f8b2 --- /dev/null +++ b/ruby/ql/lib/change-notes/2022-07-18-sqli-in-activerecord-relation-annotate.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- Calls to `ActiveRecord::Relation#annotate` are now recognized as`SqlExecution`s so that it will be considered as a sink for queries like rb/sql-injection. \ No newline at end of file diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index cc63e64a083..142d1455ce4 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -133,6 +133,11 @@ private Expr sqlFragmentArgument(MethodCall call) { or methodName = "reload" and result = call.getKeywordArgument("lock") + or + // Calls to `annotate` can be used to add block comments to SQL queries. These are potentially vulnerable to + // SQLi if user supplied input is passed in as an argument. + methodName = "annotate" and + result = call.getArgument(_) ) ) } diff --git a/ruby/ql/test/library-tests/frameworks/ActionController.expected b/ruby/ql/test/library-tests/frameworks/ActionController.expected index d306f09b64b..52ab15995c7 100644 --- a/ruby/ql/test/library-tests/frameworks/ActionController.expected +++ b/ruby/ql/test/library-tests/frameworks/ActionController.expected @@ -2,6 +2,7 @@ actionControllerControllerClasses | ActiveRecord.rb:23:1:39:3 | FooController | | ActiveRecord.rb:41:1:64:3 | BarController | | ActiveRecord.rb:66:1:70:3 | BazController | +| ActiveRecord.rb:72:1:80:3 | AnnotatedController | | app/controllers/comments_controller.rb:1:1:7:3 | CommentsController | | app/controllers/foo/bars_controller.rb:3:1:39:3 | BarsController | | app/controllers/photos_controller.rb:1:1:4:3 | PhotosController | @@ -12,6 +13,8 @@ actionControllerActionMethods | ActiveRecord.rb:42:3:47:5 | some_other_request_handler | | ActiveRecord.rb:49:3:63:5 | safe_paths | | ActiveRecord.rb:67:3:69:5 | yet_another_handler | +| ActiveRecord.rb:73:3:75:5 | index | +| ActiveRecord.rb:77:3:79:5 | unsafe_action | | app/controllers/comments_controller.rb:2:3:3:5 | index | | app/controllers/comments_controller.rb:5:3:6:5 | show | | app/controllers/foo/bars_controller.rb:5:3:7:5 | index | @@ -38,6 +41,7 @@ paramsCalls | ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params | +| ActiveRecord.rb:78:59:78:64 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | @@ -57,6 +61,7 @@ paramsSources | ActiveRecord.rb:59:12:59:17 | call to params | | ActiveRecord.rb:62:15:62:20 | call to params | | ActiveRecord.rb:68:21:68:26 | call to params | +| ActiveRecord.rb:78:59:78:64 | call to params | | app/controllers/foo/bars_controller.rb:13:21:13:26 | call to params | | app/controllers/foo/bars_controller.rb:14:10:14:15 | call to params | | app/controllers/foo/bars_controller.rb:21:21:21:26 | call to params | diff --git a/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected b/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected index d8509f6a218..b416d853440 100644 --- a/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected +++ b/ruby/ql/test/library-tests/frameworks/ActiveRecord.expected @@ -22,6 +22,7 @@ activeRecordSqlExecutionRanges | ActiveRecord.rb:46:20:46:32 | ... + ... | | ActiveRecord.rb:52:16:52:28 | "name #{...}" | | ActiveRecord.rb:56:20:56:39 | "username = #{...}" | +| ActiveRecord.rb:78:27:78:76 | "this is an unsafe annotation:..." | activeRecordModelClassMethodCalls | ActiveRecord.rb:2:3:2:17 | call to has_many | | ActiveRecord.rb:6:3:6:24 | call to belongs_to | @@ -44,6 +45,8 @@ activeRecordModelClassMethodCalls | ActiveRecord.rb:60:5:60:33 | call to find_by | | ActiveRecord.rb:62:5:62:34 | call to find | | ActiveRecord.rb:68:5:68:45 | call to delete_by | +| ActiveRecord.rb:74:13:74:54 | call to annotate | +| ActiveRecord.rb:78:13:78:77 | call to annotate | potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:9:5:9:68 | call to find | | ActiveRecord.rb:19:5:19:25 | call to destroy_by | @@ -55,6 +58,7 @@ potentiallyUnsafeSqlExecutingMethodCall | ActiveRecord.rb:46:5:46:33 | call to delete_by | | ActiveRecord.rb:52:5:52:29 | call to order | | ActiveRecord.rb:56:7:56:40 | call to find_by | +| ActiveRecord.rb:78:13:78:77 | call to annotate | activeRecordModelInstantiations | ActiveRecord.rb:9:5:9:68 | call to find | ActiveRecord.rb:5:1:15:3 | User | | ActiveRecord.rb:13:5:13:40 | call to find_by | ActiveRecord.rb:1:1:3:3 | UserGroup | diff --git a/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb b/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb index 5045a5c2264..d25cbf901c3 100644 --- a/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb +++ b/ruby/ql/test/library-tests/frameworks/ActiveRecord.rb @@ -68,3 +68,13 @@ class BazController < BarController Admin.delete_by(params[:admin_condition]) end end + +class AnnotatedController < ActionController::Base + def index + users = User.annotate("this is a safe annotation") + end + + def unsafe_action + users = User.annotate("this is an unsafe annotation:#{params[:comment]}") + end +end diff --git a/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb b/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb index 9c314a82b34..4c8dfcca10d 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb +++ b/ruby/ql/test/query-tests/security/cwe-089/ActiveRecordInjection.rb @@ -137,3 +137,17 @@ class BazController < BarController Admin.delete_by(params[:admin_condition]) end end + +class AnnotatedController < ActionController::Base + def index + name = params[:user_name] + # GOOD: string literal arguments not controlled by user are safe for annotations + users = User.annotate("this is a safe annotation").find_by(user_name: name) + end + + def unsafe_action + name = params[:user_name] + # BAD: user input passed into annotations are vulnerable to SQLi + users = User.annotate("this is an unsafe annotation:#{params[:comment]}").find_by(user_name: name) + end +end diff --git a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected index 6a9f5f771fb..dc814977cae 100644 --- a/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected +++ b/ruby/ql/test/query-tests/security/cwe-089/SqlInjection.expected @@ -31,6 +31,8 @@ edges | ActiveRecordInjection.rb:99:11:99:17 | ...[...] : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | ActiveRecordInjection.rb:20:22:20:30 | condition : | +| ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | +| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | nodes | ActiveRecordInjection.rb:8:25:8:28 | name : | semmle.label | name : | | ActiveRecordInjection.rb:8:31:8:34 | pass : | semmle.label | pass : | @@ -80,6 +82,9 @@ nodes | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | semmle.label | ... + ... | | ActiveRecordInjection.rb:137:21:137:26 | call to params : | semmle.label | call to params : | | ActiveRecordInjection.rb:137:21:137:44 | ...[...] : | semmle.label | ...[...] : | +| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | semmle.label | "this is an unsafe annotation:..." | +| ActiveRecordInjection.rb:151:59:151:64 | call to params : | semmle.label | call to params : | +| ActiveRecordInjection.rb:151:59:151:74 | ...[...] : | semmle.label | ...[...] : | subpaths #select | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | ActiveRecordInjection.rb:70:23:70:28 | call to params : | ActiveRecordInjection.rb:10:33:10:67 | "name='#{...}' and pass='#{...}'" | This SQL query depends on $@. | ActiveRecordInjection.rb:70:23:70:28 | call to params | a user-provided value | @@ -99,3 +104,4 @@ subpaths | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | ActiveRecordInjection.rb:88:18:88:23 | call to params : | ActiveRecordInjection.rb:88:18:88:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:88:18:88:23 | call to params | a user-provided value | | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | ActiveRecordInjection.rb:92:21:92:26 | call to params : | ActiveRecordInjection.rb:92:21:92:35 | ...[...] | This SQL query depends on $@. | ActiveRecordInjection.rb:92:21:92:26 | call to params | a user-provided value | | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | ActiveRecordInjection.rb:98:10:98:15 | call to params : | ActiveRecordInjection.rb:104:20:104:32 | ... + ... | This SQL query depends on $@. | ActiveRecordInjection.rb:98:10:98:15 | call to params | a user-provided value | +| ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | ActiveRecordInjection.rb:151:59:151:64 | call to params : | ActiveRecordInjection.rb:151:27:151:76 | "this is an unsafe annotation:..." | This SQL query depends on $@. | ActiveRecordInjection.rb:151:59:151:64 | call to params | a user-provided value | diff --git a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql index 398c48f01e5..4e78d56f7d2 100644 --- a/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql +++ b/swift/ql/src/queries/Security/CWE-135/StringLengthConflation.ql @@ -41,88 +41,80 @@ class StringLengthConflationConfiguration extends DataFlow::Configuration { } override predicate isSink(DataFlow::Node node, string flowstate) { - // arguments to method calls... exists( - string className, string methodName, string paramName, ClassDecl c, AbstractFunctionDecl f, - CallExpr call, int arg + AbstractFunctionDecl funcDecl, CallExpr call, string funcName, string paramName, int arg | ( - // `NSRange.init` - className = "NSRange" and - methodName = "init(location:length:)" and - paramName = ["location", "length"] + // arguments to method calls... + exists(string className, ClassDecl c | + ( + // `NSRange.init` + className = "NSRange" and + funcName = "init(location:length:)" and + paramName = ["location", "length"] + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "character(at:)" and + paramName = "at" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "substring(from:)" and + paramName = "from" + or + // `NSString.character` + className = ["NSString", "NSMutableString"] and + funcName = "substring(to:)" and + paramName = "to" + or + // `NSMutableString.insert` + className = "NSMutableString" and + funcName = "insert(_:at:)" and + paramName = "at" + ) and + c.getName() = className and + c.getAMember() = funcDecl and + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + flowstate = "String" // `String` length flowing into `NSString` + ) or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "character(at:)" and - paramName = "at" + // arguments to function calls... + // `NSMakeRange` + funcName = "NSMakeRange(_:_:)" and + paramName = ["loc", "len"] and + call.getStaticTarget() = funcDecl and + flowstate = "String" // `String` length flowing into `NSString` or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "substring(from:)" and - paramName = "from" - or - // `NSString.character` - className = ["NSString", "NSMutableString"] and - methodName = "substring(to:)" and - paramName = "to" - or - // `NSMutableString.insert` - className = "NSMutableString" and - methodName = "insert(_:at:)" and - paramName = "at" + // arguments to method calls... + ( + // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` + funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and + paramName = "k" + or + // `String.prefix`, `String.suffix` + funcName = ["prefix(_:)", "suffix(_:)"] and + paramName = "maxLength" + or + // `String.Index.init` + funcName = "init(encodedOffset:)" and + paramName = "offset" + or + // `String.index` + funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and + paramName = "n" + or + // `String.formIndex` + funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and + paramName = "distance" + ) and + call.getFunction().(ApplyExpr).getStaticTarget() = funcDecl and + flowstate = "NSString" // `NSString` length flowing into `String` ) and - c.getName() = className and - c.getAMember() = f and // TODO: will this even work if its defined in a parent class? - call.getFunction().(ApplyExpr).getStaticTarget() = f and - f.getName() = methodName and - f.getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) - or - // arguments to function calls... - exists(string funcName, string paramName, CallExpr call, int arg | - // `NSMakeRange` - funcName = "NSMakeRange(_:_:)" and - paramName = ["loc", "len"] and - call.getStaticTarget().getName() = funcName and - call.getStaticTarget().getParam(pragma[only_bind_into](arg)).getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "String" // `String` length flowing into `NSString` - ) - or - // arguments to function calls... - exists(string funcName, string paramName, CallExpr call, int arg | - ( - // `String.dropFirst`, `String.dropLast`, `String.removeFirst`, `String.removeLast` - funcName = ["dropFirst(_:)", "dropLast(_:)", "removeFirst(_:)", "removeLast(_:)"] and - paramName = "k" - or - // `String.prefix`, `String.suffix` - funcName = ["prefix(_:)", "suffix(_:)"] and - paramName = "maxLength" - or - // `String.Index.init` - funcName = "init(encodedOffset:)" and - paramName = "offset" - or - // `String.index` - funcName = ["index(_:offsetBy:)", "index(_:offsetBy:limitBy:)"] and - paramName = "n" - or - // `String.formIndex` - funcName = ["formIndex(_:offsetBy:)", "formIndex(_:offsetBy:limitBy:)"] and - paramName = "distance" - ) and - call.getFunction().(ApplyExpr).getStaticTarget().getName() = funcName and - call.getFunction() - .(ApplyExpr) - .getStaticTarget() - .getParam(pragma[only_bind_into](arg)) - .getName() = paramName and - call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() and - flowstate = "NSString" // `NSString` length flowing into `String` + // match up `funcName`, `paramName`, `arg`, `node`. + funcDecl.getName() = funcName and + funcDecl.getParam(pragma[only_bind_into](arg)).getName() = paramName and + call.getArgument(pragma[only_bind_into](arg)).getExpr() = node.asExpr() ) } diff --git a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected index 1af416dad2f..e5191b6446d 100644 --- a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected +++ b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation.expected @@ -1,4 +1,5 @@ edges +| StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | | StringLengthConflation.swift:93:28:93:31 | .length : | StringLengthConflation.swift:93:28:93:40 | ... call to -(_:_:) ... | @@ -15,6 +16,8 @@ edges | StringLengthConflation.swift:135:36:135:38 | .count : | StringLengthConflation.swift:135:36:135:46 | ... call to -(_:_:) ... | | StringLengthConflation.swift:141:28:141:30 | .count : | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | nodes +| StringLengthConflation2.swift:37:34:37:36 | .count : | semmle.label | .count : | +| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... | | StringLengthConflation.swift:53:43:53:46 | .length | semmle.label | .length | | StringLengthConflation.swift:60:47:60:50 | .length : | semmle.label | .length : | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | semmle.label | ... call to /(_:_:) ... | @@ -50,6 +53,7 @@ nodes | StringLengthConflation.swift:141:28:141:38 | ... call to -(_:_:) ... | semmle.label | ... call to -(_:_:) ... | subpaths #select +| StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | StringLengthConflation2.swift:37:34:37:36 | .count : | StringLengthConflation2.swift:37:34:37:44 | ... call to -(_:_:) ... | This String length is used in an NSString, but it may not be equivalent. | | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | StringLengthConflation.swift:53:43:53:46 | .length | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | StringLengthConflation.swift:60:47:60:50 | .length : | StringLengthConflation.swift:60:47:60:59 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | StringLengthConflation.swift:66:33:66:36 | .length : | StringLengthConflation.swift:66:33:66:45 | ... call to /(_:_:) ... | This NSString length is used in a String, but it may not be equivalent. | diff --git a/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift new file mode 100644 index 00000000000..39b1456e604 --- /dev/null +++ b/swift/ql/test/query-tests/Security/CWE-135/StringLengthConflation2.swift @@ -0,0 +1,42 @@ +// this test is in a separate file, because we want to test with a slightly different library +// implementation. In this version, some of the functions of `NSString` are in fact implemented +// in a base class `NSStringBase`. + +// --- stubs --- + +func print(_ items: Any...) {} + +typealias unichar = UInt16 + +class NSObject +{ +} + +class NSStringBase : NSObject +{ + func substring(from: Int) -> String { return "" } +} + +class NSString : NSStringBase +{ + init(string: String) { length = string.count } + + func substring(to: Int) -> String { return "" } + + private(set) var length: Int +} + +// --- tests --- + +func test(s: String) { + let ns = NSString(string: s) + + let nstr1 = ns.substring(from: ns.length - 1) // GOOD + let nstr2 = ns.substring(from: s.count - 1) // BAD: String length used in NSString [NOT DETECTED] + let nstr3 = ns.substring(to: ns.length - 1) // GOOD + let nstr4 = ns.substring(to: s.count - 1) // BAD: String length used in NSString + print("substrings '\(nstr1)' '\(nstr2)' / '\(nstr3)' '\(nstr4)'") +} + +// `begin :thumbsup: end`, with thumbs up emoji and skin tone modifier +test(s: "begin \u{0001F44D}\u{0001F3FF} end")