Merge pull request #10855 from erik-krogh/formatTaint

Ruby: taint-steps for printf calls - and add a `AdditionalTaintStep` class
This commit is contained in:
Erik Krogh Kristensen
2022-11-16 12:08:45 +01:00
committed by GitHub
10 changed files with 200 additions and 71 deletions

View File

@@ -23,3 +23,4 @@ private import codeql.ruby.frameworks.HttpClients
private import codeql.ruby.frameworks.XmlParsing
private import codeql.ruby.frameworks.ActionDispatch
private import codeql.ruby.frameworks.PosixSpawn
private import codeql.ruby.frameworks.StringFormatters

View File

@@ -0,0 +1,30 @@
/**
* Provides classes representing various flow steps for taint tracking.
*/
private import codeql.ruby.DataFlow
private import internal.DataFlowPrivate as DFPrivate
private class Unit = DFPrivate::Unit;
/**
* A module importing the frameworks that implement additional flow steps,
* ensuring that they are visible to the taint tracking library.
*/
private module Frameworks {
import codeql.ruby.frameworks.StringFormatters
}
/**
* A unit class for adding additional taint steps.
*
* Extend this class to add additional taint steps that should apply to all
* taint configurations.
*/
class AdditionalTaintStep extends Unit {
/**
* Holds if the step from `node1` to `node2` should be considered a taint
* step for all configurations.
*/
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
}

View File

@@ -62,6 +62,8 @@ private CfgNodes::ExprNodes::VariableWriteAccessCfgNode variablesInPattern(
cached
private module Cached {
private import codeql.ruby.dataflow.FlowSteps as FlowSteps
cached
predicate forceCachingInSameStage() { any() }
@@ -93,12 +95,10 @@ private module Cached {
)
)
or
// string interpolation of `nodeFrom` into `nodeTo`
nodeFrom.asExpr() =
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
or
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
or
any(FlowSteps::AdditionalTaintStep s).step(nodeFrom, nodeTo)
or
// Although flow through collections is modeled precisely using stores/reads, we still
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
// tracking configurations where the source is a collection.

View File

@@ -0,0 +1,135 @@
/**
* Provides classes for modeling string formatting libraries.
*/
private import codeql.ruby.AST as Ast
private import codeql.ruby.DataFlow
private import codeql.ruby.ApiGraphs
private import codeql.ruby.frameworks.core.IO
/**
* A call to `printf` or `sprintf`.
*/
abstract class PrintfStyleCall extends DataFlow::CallNode {
// We assume that most printf-like calls have the signature f(format_string, args...)
/**
* Gets the format string of this call.
*/
DataFlow::Node getFormatString() { result = this.getArgument(0) }
/**
* Gets then `n`th formatted argument of this call.
*/
DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
/** Holds if this call returns the formatted string. */
predicate returnsFormatted() { any() }
}
/**
* A call to `Kernel.printf`.
*/
class KernelPrintfCall extends PrintfStyleCall {
KernelPrintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
or
this.asExpr().getExpr() instanceof Ast::UnknownMethodCall and
this.getMethodName() = "printf"
}
// Kernel#printf supports two signatures:
// printf(io, string, ...)
// printf(string, ...)
override DataFlow::Node getFormatString() {
// Because `printf` has two different signatures, we can't be sure which
// argument is the format string, so we use a heuristic:
// If the first argument has a string value, then we assume it is the format string.
// Otherwise we treat both the first and second args as the format string.
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
then result = this.getArgument(0)
else result = this.getArgument([0, 1])
}
override predicate returnsFormatted() { none() }
}
/**
* A call to `Kernel.sprintf`.
*/
class KernelSprintfCall extends PrintfStyleCall {
KernelSprintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
or
this.asExpr().getExpr() instanceof Ast::UnknownMethodCall and
this.getMethodName() = "sprintf"
}
override predicate returnsFormatted() { any() }
}
/**
* A call to `IO#printf`.
*/
class IOPrintfCall extends PrintfStyleCall {
IOPrintfCall() {
this.getReceiver() instanceof IO::IOInstance and this.getMethodName() = "printf"
}
override predicate returnsFormatted() { none() }
}
/**
* A call to `String#%`.
*/
class StringPercentCall extends PrintfStyleCall {
StringPercentCall() { this.getMethodName() = "%" }
override DataFlow::Node getFormatString() { result = this.getReceiver() }
override DataFlow::Node getFormatArgument(int n) {
exists(CfgNodes::ExprNodes::ArrayLiteralCfgNode arrLit | arrLit = this.getArgument(0).asExpr() |
result.asExpr() = arrLit.getArgument(n)
)
or
exists(CfgNodes::ExprNodes::HashLiteralCfgNode hashLit |
hashLit = this.getArgument(0).asExpr()
|
n = -2 and // -2 is indicates that the index does not make sense in this context
result.asExpr() = hashLit.getAKeyValuePair().getValue()
)
}
override predicate returnsFormatted() { any() }
}
private import codeql.ruby.dataflow.FlowSteps
private import codeql.ruby.CFG
/**
* A step for string interpolation of `pred` into `succ`.
* E.g.
* ```rb
* succ = "foo #{pred} bar"
* ```
*/
private class StringLiteralFormatStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
pred.asExpr() = succ.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
}
}
/**
* A taint propagating data flow edge arising from string formatting.
*/
private class StringFormattingTaintStep extends AdditionalTaintStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(PrintfStyleCall call |
call.returnsFormatted() and
succ = call
|
pred = call.getFormatString()
or
pred = call.getFormatArgument(_)
)
}
}

View File

@@ -2,73 +2,8 @@
* Provides Ruby-specific imports and classes needed for `TaintedFormatStringQuery` and `TaintedFormatStringCustomizations`.
*/
import codeql.ruby.AST
import codeql.ruby.frameworks.StringFormatters
import codeql.ruby.DataFlow
import codeql.ruby.dataflow.RemoteFlowSources
import codeql.ruby.ApiGraphs
import codeql.ruby.TaintTracking
private import codeql.ruby.frameworks.Files
private import codeql.ruby.frameworks.core.IO
private import codeql.ruby.controlflow.CfgNodes
/**
* A call to `printf` or `sprintf`.
*/
abstract class PrintfStyleCall extends DataFlow::CallNode {
// We assume that most printf-like calls have the signature f(format_string, args...)
/**
* Gets the format string of this call.
*/
DataFlow::Node getFormatString() { result = this.getArgument(0) }
/**
* Gets then `n`th formatted argument of this call.
*/
DataFlow::Node getFormatArgument(int n) { n >= 0 and result = this.getArgument(n + 1) }
}
/**
* A call to `Kernel.printf`.
*/
class KernelPrintfCall extends PrintfStyleCall {
KernelPrintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("printf")
or
this.asExpr().getExpr() instanceof UnknownMethodCall and
this.getMethodName() = "printf"
}
// Kernel#printf supports two signatures:
// printf(io, string, ...)
// printf(string, ...)
override DataFlow::Node getFormatString() {
// Because `printf` has two different signatures, we can't be sure which
// argument is the format string, so we use a heuristic:
// If the first argument has a string value, then we assume it is the format string.
// Otherwise we treat both the first and second args as the format string.
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
then result = this.getArgument(0)
else result = this.getArgument([0, 1])
}
}
/**
* A call to `Kernel.sprintf`.
*/
class KernelSprintfCall extends PrintfStyleCall {
KernelSprintfCall() {
this = API::getTopLevelMember("Kernel").getAMethodCall("sprintf")
or
this.asExpr().getExpr() instanceof UnknownMethodCall and
this.getMethodName() = "sprintf"
}
}
/**
* A call to `IO#printf`.
*/
class IOPrintfCall extends PrintfStyleCall {
IOPrintfCall() {
this.getReceiver() instanceof IO::IOInstance and this.getMethodName() = "printf"
}
}
import codeql.ruby.DataFlow

View File

@@ -12,6 +12,7 @@ edges
| string_flow.rb:10:29:10:29 | b : | string_flow.rb:10:10:10:30 | call to try_convert |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:15:10:15:17 | ... % ... |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:15:17:15:17 | a : |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:16:10:16:29 | ... % ... |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:16:28:16:28 | a : |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:17:10:17:10 | a : |
| string_flow.rb:14:9:14:18 | call to source : | string_flow.rb:17:10:17:18 | ... % ... |

View File

@@ -11,6 +11,7 @@ edges
| app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text |
| app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : | app/views/foo/bars/_widget.html.erb:8:9:8:36 | ...[...] |
| app/views/foo/stores/show.html.erb:41:76:41:87 | call to display_text : | app/views/foo/stores/show.html.erb:41:64:41:87 | ... + ... : |
| app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf |
nodes
| app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read : | semmle.label | call to read : |
| app/controllers/foo/stores_controller.rb:9:22:9:23 | dt : | semmle.label | dt : |
@@ -31,6 +32,8 @@ nodes
| app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | semmle.label | call to raw_name |
| app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | semmle.label | call to display_name |
| app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | semmle.label | @other_user_raw_name |
| app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | semmle.label | call to sprintf |
| app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | semmle.label | call to handle : |
subpaths
#select
| app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read : | app/views/foo/bars/_widget.html.erb:5:9:5:20 | call to display_text | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:8:10:8:29 | call to read | stored value |
@@ -46,3 +49,4 @@ subpaths
| app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:70:3:70:20 | call to raw_name | stored value |
| app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:80:5:80:22 | call to display_name | stored value |
| app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name : | app/views/foo/stores/show.html.erb:83:5:83:24 | @other_user_raw_name | Stored cross-site scripting vulnerability due to $@. | app/controllers/foo/stores_controller.rb:12:28:12:48 | call to raw_name | stored value |
| app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle : | app/views/foo/stores/show.html.erb:87:3:87:29 | call to sprintf | Stored cross-site scripting vulnerability due to $@. | app/views/foo/stores/show.html.erb:87:17:87:28 | call to handle | stored value |

View File

@@ -82,5 +82,10 @@
<%# BAD: Indirect to a database value without escaping %>
<%= @other_user_raw_name.html_safe %>
<%# BAD: Kernel.sprintf is a taint-step %>
<%=
sprintf("%s", @user.handle).html_safe
%>
<%# GOOD: The `foo.bar.baz` is not recognized as a source %>
<%= @other_user_raw_name.foo.bar.baz.html_safe %>

View File

@@ -12,6 +12,10 @@ edges
| tainted_format_string.rb:33:32:33:46 | ...[...] : | tainted_format_string.rb:33:12:33:46 | ... + ... |
| tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:30:36:44 | ...[...] : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" |
| tainted_format_string.rb:39:22:39:27 | call to params : | tainted_format_string.rb:39:22:39:36 | ...[...] : |
| tainted_format_string.rb:39:22:39:36 | ...[...] : | tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" |
| tainted_format_string.rb:42:22:42:27 | call to params : | tainted_format_string.rb:42:22:42:36 | ...[...] : |
| tainted_format_string.rb:42:22:42:36 | ...[...] : | tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" |
nodes
| tainted_format_string.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:4:12:4:26 | ...[...] | semmle.label | ...[...] |
@@ -37,6 +41,12 @@ nodes
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
| tainted_format_string.rb:36:30:36:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | semmle.label | "A log message #{...} %{foo}" |
| tainted_format_string.rb:39:22:39:27 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:39:22:39:36 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | semmle.label | "A log message #{...} %08x" |
| tainted_format_string.rb:42:22:42:27 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:42:22:42:36 | ...[...] : | semmle.label | ...[...] : |
subpaths
#select
| tainted_format_string.rb:4:12:4:26 | ...[...] | tainted_format_string.rb:4:12:4:17 | call to params : | tainted_format_string.rb:4:12:4:26 | ...[...] | Format string depends on a $@. | tainted_format_string.rb:4:12:4:17 | call to params | user-provided value |
@@ -50,3 +60,5 @@ subpaths
| tainted_format_string.rb:28:19:28:33 | ...[...] | tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] | Format string depends on a $@. | tainted_format_string.rb:28:19:28:24 | call to params | user-provided value |
| tainted_format_string.rb:33:12:33:46 | ... + ... | tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:12:33:46 | ... + ... | Format string depends on a $@. | tainted_format_string.rb:33:32:33:37 | call to params | user-provided value |
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | Format string depends on a $@. | tainted_format_string.rb:36:30:36:35 | call to params | user-provided value |
| tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | tainted_format_string.rb:39:22:39:27 | call to params : | tainted_format_string.rb:39:5:39:45 | "A log message #{...} %{foo}" | Format string depends on a $@. | tainted_format_string.rb:39:22:39:27 | call to params | user-provided value |
| tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | tainted_format_string.rb:42:22:42:27 | call to params : | tainted_format_string.rb:42:5:42:43 | "A log message #{...} %08x" | Format string depends on a $@. | tainted_format_string.rb:42:22:42:27 | call to params | user-provided value |

View File

@@ -34,5 +34,11 @@ class UsersController < ActionController::Base
# Taint via string interpolation
printf("A log message: #{params[:format]}", arg) # BAD
# Using String#
"A log message #{params[:format]} %{foo}" % {foo: "foo"} # BAD
# String# with an array
"A log message #{params[:format]} %08x" % ["foo"] # BAD
end
end