Merge pull request #8272 from hmac/hmac/tainted-format-string

This commit is contained in:
Harry Maclean
2022-03-22 08:37:47 +13:00
committed by GitHub
16 changed files with 345 additions and 10 deletions

View File

@@ -0,0 +1,43 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about
* format injections, as well as extension points for adding your own.
*/
/**
* Provides default sources, sinks and sanitizers for reasoning about
* format injections, as well as extension points for adding your own.
*/
module TaintedFormatString {
import TaintedFormatStringSpecific
/**
* A data flow source for format injections.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for format injections.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for format injections.
*/
abstract class Sanitizer extends DataFlow::Node { }
/** A source of remote user input, considered as a flow source for format injection. */
class RemoteSource extends Source instanceof RemoteFlowSource { }
/**
* A format argument to a printf-like function, considered as a flow sink for format injection.
*/
class FormatSink extends Sink {
FormatSink() {
exists(PrintfStyleCall printf |
this = printf.getFormatString() and
// exclude trivial case where there are no arguments to interpolate
exists(printf.getFormatArgument(_))
)
}
}
}

View File

@@ -0,0 +1,27 @@
/**
* Provides a taint-tracking configuration for reasoning about format
* injections.
*
*
* Note, for performance reasons: only import this file if
* `TaintedFormatString::Configuration` is needed, otherwise
* `TaintedFormatStringCustomizations` should be imported instead.
*/
private import TaintedFormatStringCustomizations::TaintedFormatString
/**
* A taint-tracking configuration for format injections.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "TaintedFormatString" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
super.isSanitizer(node) or
node instanceof Sanitizer
}
}

View File

@@ -0,0 +1,71 @@
/**
* Provides Ruby-specific imports and classes needed for `TaintedFormatStringQuery` and `TaintedFormatStringCustomizations`.
*/
import ruby
import codeql.ruby.DataFlow
import codeql.ruby.dataflow.RemoteFlowSources
import codeql.ruby.ApiGraphs
import codeql.ruby.TaintTracking
private import codeql.ruby.frameworks.Files::IO
private import codeql.ruby.controlflow.CfgNodes
/**
* A call to `printf` or `sprintf`.
*/
abstract class PrintfStyleCall extends DataFlow::CallNode {
// We assume that most printf-like calls have the signature f(format_string, args...)
/**
* Gets the format string of this call.
*/
DataFlow::Node getFormatString() { result = this.getArgument(0) }
/**
* Gets then `n`th formatted argument of this call.
*/
DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
}
/**
* A call to `Kernel.printf`.
*/
class KernelPrintfCall extends PrintfStyleCall {
KernelPrintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
or
this.asExpr().getExpr() instanceof UnknownMethodCall and
this.getMethodName() = "printf"
}
// Kernel#printf supports two signatures:
// printf(io, string, ...)
// printf(string, ...)
override DataFlow::Node getFormatString() {
// Because `printf` has two different signatures, we can't be sure which
// argument is the format string, so we use a heuristic:
// If the first argument has a string value, then we assume it is the format string.
// Otherwise we treat both the first and second args as the format string.
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
then result = this.getArgument(0)
else result = this.getArgument([0, 1])
}
}
/**
* A call to `Kernel.sprintf`.
*/
class KernelSprintfCall extends PrintfStyleCall {
KernelSprintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
or
this.asExpr().getExpr() instanceof UnknownMethodCall and
this.getMethodName() = "sprintf"
}
}
/**
* A call to `IO#printf`.
*/
class IOPrintfCall extends PrintfStyleCall {
IOPrintfCall() { this.getReceiver() instanceof IOInstance and this.getMethodName() = "printf" }
}

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `rb/http-tainted-format-string`. The query finds cases where data from remote user input is used in a string formatting method in a way that allows arbitrary format specifiers to be inserted.

View File

@@ -0,0 +1,50 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Methods like <code>Kernel.printf</code> accept a format string that is used to format
the remaining arguments by providing inline format specifiers. If the format string
contains unsanitized input from an untrusted source, then that string may contain
unexpected format specifiers that cause garbled output or throw an exception.
</p>
</overview>
<recommendation>
<p>
Either sanitize the input before including it in the format string, or use a
<code>%s</code> specifier in the format string, and pass the untrusted data as corresponding
argument.
</p>
</recommendation>
<example>
<p>
The following program snippet logs information about an unauthorized access attempt. The
log message includes the user name, and the user's IP address is passed as an additional
argument to <code>Kernel.printf</code> to be appended to the message:
</p>
<sample src="examples/tainted_format_string_bad.rb"/>
<p>
However, if a malicious user provides a format specified such as <code>%s</code>
as their user name, <code>Kernel.printf</code> will throw an exception as there
are too few arguments to satisfy the format. This can result in denial of
service or leaking of internal information to the attacker via a stack trace.
</p>
<p>
Instead, the user name should be included using the <code>%s</code> specifier:
</p>
<sample src="examples/tainted_format_string_good.rb"/>
<p>
Alternatively, string interpolation should be used exclusively:
</p>
<sample src="examples/tainted_format_string_interpolation.rb"/>
</example>
<references>
<li>Ruby documentation for <a href="https://docs.ruby-lang.org/en/3.1/Kernel.html#method-i-sprintf">format strings</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,21 @@
/**
* @name Use of externally-controlled format string
* @description Using external input in format strings can lead to garbled output.
* @kind path-problem
* @problem.severity warning
* @security-severity 7.3
* @precision high
* @id rb/tainted-format-string
* @tags security
* external/cwe/cwe-134
*/
import ruby
import codeql.ruby.DataFlow
import codeql.ruby.security.TaintedFormatStringQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "$@ flows here and is used in a format string.",
source.getNode(), "User-provided value"

View File

@@ -0,0 +1,5 @@
class UsersController < ActionController::Base
def index
printf("Unauthorised access attempt by #{params[:user]}: %s", request.ip)
end
end

View File

@@ -0,0 +1,5 @@
class UsersController < ActionController::Base
def index
printf("Unauthorised access attempt by %s: %s", params[:user], request.ip)
end
end

View File

@@ -0,0 +1,5 @@
class UsersController < ActionController::Base
def index
puts "Unauthorised access attempt by #{params[:user]}: #{request.ip}"
end
end

View File

@@ -0,0 +1,52 @@
edges
| tainted_format_string.rb:4:12:4:17 | call to params : | tainted_format_string.rb:4:12:4:26 | ...[...] |
| tainted_format_string.rb:5:19:5:24 | call to params : | tainted_format_string.rb:5:19:5:33 | ...[...] |
| tainted_format_string.rb:10:23:10:28 | call to params : | tainted_format_string.rb:10:23:10:37 | ...[...] |
| tainted_format_string.rb:11:30:11:35 | call to params : | tainted_format_string.rb:11:30:11:44 | ...[...] |
| tainted_format_string.rb:18:23:18:28 | call to params : | tainted_format_string.rb:18:23:18:37 | ...[...] |
| tainted_format_string.rb:19:30:19:35 | call to params : | tainted_format_string.rb:19:30:19:44 | ...[...] |
| tainted_format_string.rb:21:27:21:32 | call to params : | tainted_format_string.rb:21:27:21:41 | ...[...] |
| tainted_format_string.rb:22:20:22:25 | call to params : | tainted_format_string.rb:22:20:22:34 | ...[...] |
| tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] |
| tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:32:33:46 | ...[...] : |
| tainted_format_string.rb:33:32:33:46 | ...[...] : | tainted_format_string.rb:33:12:33:46 | ... + ... |
| tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:30:36:44 | ...[...] : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" |
nodes
| tainted_format_string.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:4:12:4:26 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:5:19:5:24 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:5:19:5:33 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:10:23:10:28 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:10:23:10:37 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:11:30:11:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:11:30:11:44 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:18:23:18:28 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:18:23:18:37 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:19:30:19:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:19:30:19:44 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:21:27:21:32 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:21:27:21:41 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:22:20:22:25 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:22:20:22:34 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:28:19:28:24 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:28:19:28:33 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:33:12:33:46 | ... + ... | semmle.label | ... + ... |
| tainted_format_string.rb:33:32:33:37 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:33:32:33:46 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
| tainted_format_string.rb:36:30:36:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | semmle.label | ...[...] : |
subpaths
#select
| tainted_format_string.rb:4:12:4:26 | ...[...] | tainted_format_string.rb:4:12:4:17 | call to params : | tainted_format_string.rb:4:12:4:26 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:4:12:4:17 | call to params | User-provided value |
| tainted_format_string.rb:5:19:5:33 | ...[...] | tainted_format_string.rb:5:19:5:24 | call to params : | tainted_format_string.rb:5:19:5:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:5:19:5:24 | call to params | User-provided value |
| tainted_format_string.rb:10:23:10:37 | ...[...] | tainted_format_string.rb:10:23:10:28 | call to params : | tainted_format_string.rb:10:23:10:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:10:23:10:28 | call to params | User-provided value |
| tainted_format_string.rb:11:30:11:44 | ...[...] | tainted_format_string.rb:11:30:11:35 | call to params : | tainted_format_string.rb:11:30:11:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:11:30:11:35 | call to params | User-provided value |
| tainted_format_string.rb:18:23:18:37 | ...[...] | tainted_format_string.rb:18:23:18:28 | call to params : | tainted_format_string.rb:18:23:18:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:18:23:18:28 | call to params | User-provided value |
| tainted_format_string.rb:19:30:19:44 | ...[...] | tainted_format_string.rb:19:30:19:35 | call to params : | tainted_format_string.rb:19:30:19:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:19:30:19:35 | call to params | User-provided value |
| tainted_format_string.rb:21:27:21:41 | ...[...] | tainted_format_string.rb:21:27:21:32 | call to params : | tainted_format_string.rb:21:27:21:41 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:21:27:21:32 | call to params | User-provided value |
| tainted_format_string.rb:22:20:22:34 | ...[...] | tainted_format_string.rb:22:20:22:25 | call to params : | tainted_format_string.rb:22:20:22:34 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:22:20:22:25 | call to params | User-provided value |
| tainted_format_string.rb:28:19:28:33 | ...[...] | tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:28:19:28:24 | call to params | User-provided value |
| tainted_format_string.rb:33:12:33:46 | ... + ... | tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:12:33:46 | ... + ... | $@ flows here and is used in a format string. | tainted_format_string.rb:33:32:33:37 | call to params | User-provided value |
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | $@ flows here and is used in a format string. | tainted_format_string.rb:36:30:36:35 | call to params | User-provided value |

View File

@@ -0,0 +1 @@
queries/security/cwe-134/TaintedFormatString.ql

View File

@@ -0,0 +1,38 @@
class UsersController < ActionController::Base
def show
printf(params[:format], arg) # BAD
Kernel.printf(params[:format], arg) # BAD
printf(params[:format]) # GOOD
Kernel.printf(params[:format]) # GOOD
printf(IO.new(1), params[:format], arg) # BAD
Kernel.printf(IO.new(1), params[:format], arg) # BAD
printf("%s", params[:format]) # GOOD
Kernel.printf("%s", params[:format]) # GOOD
fmt = "%s"
printf(fmt, params[:format]) # GOOD
printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]
Kernel.printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]
str1 = Kernel.sprintf(params[:format], arg) # BAD
str2 = sprintf(params[:format], arg) # BAD
str1 = Kernel.sprintf(params[:format]) # GOOD
str2 = sprintf(params[:format]) # GOOD
stdout = IO.new 1
stdout.printf(params[:format], arg) # BAD
stdout.printf(params[:format]) # GOOD
# Taint via string concatenation
printf("A log message: " + params[:format], arg) # BAD
# Taint via string interpolation
printf("A log message: #{params[:format]}", arg) # BAD
end
end