Ruby: Avoid FP in TaintedFormatString query

Kernel#printf supports two call signatures:

    printf(String, *args)
    printf(IO, String, *args)

We want to identify the String argument, which is the format string.
Previously we would return the 0th and 1st arguments, which gives some
FPs when the 1st arg is not a format string.

We now try to rule out the trivial case by checking if arg 0 has a
string value, and then assuming it is the format string. Otherwise we
fall back to returning both arguments.

This still has some false positive potential, but less than previously.
This commit is contained in:
Harry Maclean
2022-03-21 12:45:12 +13:00
parent 5dcf0ad759
commit 5a6da827d0
3 changed files with 47 additions and 33 deletions

View File

@@ -8,6 +8,7 @@ import codeql.ruby.dataflow.RemoteFlowSources
import codeql.ruby.ApiGraphs
import codeql.ruby.TaintTracking
private import codeql.ruby.frameworks.Files::IO
private import codeql.ruby.controlflow.CfgNodes
/**
* A call to `printf` or `sprintf`.
@@ -39,7 +40,15 @@ class KernelPrintfCall extends PrintfStyleCall {
// Kernel#printf supports two signatures:
// printf(io, string, ...)
// printf(string, ...)
override DataFlow::Node getFormatString() { result = this.getArgument([0, 1]) }
override DataFlow::Node getFormatString() {
// Because `printf` has two different signatures, we can't be sure which
// argument is the format string, so we use a heuristic:
// If the first argument has a string value, then we assume it is the format string.
// Otherwise we treat both the first and second args as the format string.
if this.getArgument(0).getExprNode().getConstantValue().isString(_)
then result = this.getArgument(0)
else result = this.getArgument([0, 1])
}
}
/**

View File

@@ -3,15 +3,15 @@ edges
| tainted_format_string.rb:5:19:5:24 | call to params : | tainted_format_string.rb:5:19:5:33 | ...[...] |
| tainted_format_string.rb:10:23:10:28 | call to params : | tainted_format_string.rb:10:23:10:37 | ...[...] |
| tainted_format_string.rb:11:30:11:35 | call to params : | tainted_format_string.rb:11:30:11:44 | ...[...] |
| tainted_format_string.rb:13:23:13:28 | call to params : | tainted_format_string.rb:13:23:13:37 | ...[...] |
| tainted_format_string.rb:14:30:14:35 | call to params : | tainted_format_string.rb:14:30:14:44 | ...[...] |
| tainted_format_string.rb:16:27:16:32 | call to params : | tainted_format_string.rb:16:27:16:41 | ...[...] |
| tainted_format_string.rb:17:20:17:25 | call to params : | tainted_format_string.rb:17:20:17:34 | ...[...] |
| tainted_format_string.rb:23:19:23:24 | call to params : | tainted_format_string.rb:23:19:23:33 | ...[...] |
| tainted_format_string.rb:28:32:28:37 | call to params : | tainted_format_string.rb:28:32:28:46 | ...[...] : |
| tainted_format_string.rb:28:32:28:46 | ...[...] : | tainted_format_string.rb:28:12:28:46 | ... + ... |
| tainted_format_string.rb:31:30:31:35 | call to params : | tainted_format_string.rb:31:30:31:44 | ...[...] : |
| tainted_format_string.rb:31:30:31:44 | ...[...] : | tainted_format_string.rb:31:12:31:46 | "A log message: #{...}" |
| tainted_format_string.rb:18:23:18:28 | call to params : | tainted_format_string.rb:18:23:18:37 | ...[...] |
| tainted_format_string.rb:19:30:19:35 | call to params : | tainted_format_string.rb:19:30:19:44 | ...[...] |
| tainted_format_string.rb:21:27:21:32 | call to params : | tainted_format_string.rb:21:27:21:41 | ...[...] |
| tainted_format_string.rb:22:20:22:25 | call to params : | tainted_format_string.rb:22:20:22:34 | ...[...] |
| tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] |
| tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:32:33:46 | ...[...] : |
| tainted_format_string.rb:33:32:33:46 | ...[...] : | tainted_format_string.rb:33:12:33:46 | ... + ... |
| tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:30:36:44 | ...[...] : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" |
nodes
| tainted_format_string.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:4:12:4:26 | ...[...] | semmle.label | ...[...] |
@@ -21,32 +21,32 @@ nodes
| tainted_format_string.rb:10:23:10:37 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:11:30:11:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:11:30:11:44 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:13:23:13:28 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:13:23:13:37 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:14:30:14:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:14:30:14:44 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:16:27:16:32 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:16:27:16:41 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:17:20:17:25 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:17:20:17:34 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:23:19:23:24 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:23:19:23:33 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:28:12:28:46 | ... + ... | semmle.label | ... + ... |
| tainted_format_string.rb:28:32:28:37 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:28:32:28:46 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:31:12:31:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
| tainted_format_string.rb:31:30:31:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:31:30:31:44 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:18:23:18:28 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:18:23:18:37 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:19:30:19:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:19:30:19:44 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:21:27:21:32 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:21:27:21:41 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:22:20:22:25 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:22:20:22:34 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:28:19:28:24 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:28:19:28:33 | ...[...] | semmle.label | ...[...] |
| tainted_format_string.rb:33:12:33:46 | ... + ... | semmle.label | ... + ... |
| tainted_format_string.rb:33:32:33:37 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:33:32:33:46 | ...[...] : | semmle.label | ...[...] : |
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | semmle.label | "A log message: #{...}" |
| tainted_format_string.rb:36:30:36:35 | call to params : | semmle.label | call to params : |
| tainted_format_string.rb:36:30:36:44 | ...[...] : | semmle.label | ...[...] : |
subpaths
#select
| tainted_format_string.rb:4:12:4:26 | ...[...] | tainted_format_string.rb:4:12:4:17 | call to params : | tainted_format_string.rb:4:12:4:26 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:4:12:4:17 | call to params | User-provided value |
| tainted_format_string.rb:5:19:5:33 | ...[...] | tainted_format_string.rb:5:19:5:24 | call to params : | tainted_format_string.rb:5:19:5:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:5:19:5:24 | call to params | User-provided value |
| tainted_format_string.rb:10:23:10:37 | ...[...] | tainted_format_string.rb:10:23:10:28 | call to params : | tainted_format_string.rb:10:23:10:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:10:23:10:28 | call to params | User-provided value |
| tainted_format_string.rb:11:30:11:44 | ...[...] | tainted_format_string.rb:11:30:11:35 | call to params : | tainted_format_string.rb:11:30:11:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:11:30:11:35 | call to params | User-provided value |
| tainted_format_string.rb:13:23:13:37 | ...[...] | tainted_format_string.rb:13:23:13:28 | call to params : | tainted_format_string.rb:13:23:13:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:13:23:13:28 | call to params | User-provided value |
| tainted_format_string.rb:14:30:14:44 | ...[...] | tainted_format_string.rb:14:30:14:35 | call to params : | tainted_format_string.rb:14:30:14:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:14:30:14:35 | call to params | User-provided value |
| tainted_format_string.rb:16:27:16:41 | ...[...] | tainted_format_string.rb:16:27:16:32 | call to params : | tainted_format_string.rb:16:27:16:41 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:16:27:16:32 | call to params | User-provided value |
| tainted_format_string.rb:17:20:17:34 | ...[...] | tainted_format_string.rb:17:20:17:25 | call to params : | tainted_format_string.rb:17:20:17:34 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:17:20:17:25 | call to params | User-provided value |
| tainted_format_string.rb:23:19:23:33 | ...[...] | tainted_format_string.rb:23:19:23:24 | call to params : | tainted_format_string.rb:23:19:23:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:23:19:23:24 | call to params | User-provided value |
| tainted_format_string.rb:28:12:28:46 | ... + ... | tainted_format_string.rb:28:32:28:37 | call to params : | tainted_format_string.rb:28:12:28:46 | ... + ... | $@ flows here and is used in a format string. | tainted_format_string.rb:28:32:28:37 | call to params | User-provided value |
| tainted_format_string.rb:31:12:31:46 | "A log message: #{...}" | tainted_format_string.rb:31:30:31:35 | call to params : | tainted_format_string.rb:31:12:31:46 | "A log message: #{...}" | $@ flows here and is used in a format string. | tainted_format_string.rb:31:30:31:35 | call to params | User-provided value |
| tainted_format_string.rb:18:23:18:37 | ...[...] | tainted_format_string.rb:18:23:18:28 | call to params : | tainted_format_string.rb:18:23:18:37 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:18:23:18:28 | call to params | User-provided value |
| tainted_format_string.rb:19:30:19:44 | ...[...] | tainted_format_string.rb:19:30:19:35 | call to params : | tainted_format_string.rb:19:30:19:44 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:19:30:19:35 | call to params | User-provided value |
| tainted_format_string.rb:21:27:21:41 | ...[...] | tainted_format_string.rb:21:27:21:32 | call to params : | tainted_format_string.rb:21:27:21:41 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:21:27:21:32 | call to params | User-provided value |
| tainted_format_string.rb:22:20:22:34 | ...[...] | tainted_format_string.rb:22:20:22:25 | call to params : | tainted_format_string.rb:22:20:22:34 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:22:20:22:25 | call to params | User-provided value |
| tainted_format_string.rb:28:19:28:33 | ...[...] | tainted_format_string.rb:28:19:28:24 | call to params : | tainted_format_string.rb:28:19:28:33 | ...[...] | $@ flows here and is used in a format string. | tainted_format_string.rb:28:19:28:24 | call to params | User-provided value |
| tainted_format_string.rb:33:12:33:46 | ... + ... | tainted_format_string.rb:33:32:33:37 | call to params : | tainted_format_string.rb:33:12:33:46 | ... + ... | $@ flows here and is used in a format string. | tainted_format_string.rb:33:32:33:37 | call to params | User-provided value |
| tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | tainted_format_string.rb:36:30:36:35 | call to params : | tainted_format_string.rb:36:12:36:46 | "A log message: #{...}" | $@ flows here and is used in a format string. | tainted_format_string.rb:36:30:36:35 | call to params | User-provided value |

View File

@@ -10,6 +10,11 @@ class UsersController < ActionController::Base
printf(IO.new(1), params[:format], arg) # BAD
Kernel.printf(IO.new(1), params[:format], arg) # BAD
printf("%s", params[:format]) # GOOD
Kernel.printf("%s", params[:format]) # GOOD
fmt = "%s"
printf(fmt, params[:format]) # GOOD
printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]
Kernel.printf(IO.new(1), params[:format]) # GOOD [FALSE POSITIVE]