Merge pull request #6978 from github/nickrolfe/regex_injection

Ruby: add regex injection query
This commit is contained in:
Nick Rolfe
2021-11-23 16:22:35 +00:00
committed by GitHub
11 changed files with 306 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
lgtm,codescanning
* A new query (`rb/regexp-injection`) has been added. The query finds regular expressions constructed from user input, which could allow an attacker to perform a Regular Expression Denial of Service (ReDoS) attack.

View File

@@ -2,6 +2,7 @@ private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.DataFlow
private import codeql.ruby.ApiGraphs
private import codeql.ruby.dataflow.FlowSummary
/**
* The `Kernel` module is included by the `Object` class, so its methods are available
@@ -333,3 +334,18 @@ class ModuleEvalCallCodeExecution extends CodeExecution::Range, DataFlow::CallNo
override DataFlow::Node getCode() { result = this.getArgument(0) }
}
/** Flow summary for `Regexp.escape` and its alias, `Regexp.quote`. */
class RegexpEscapeSummary extends SummarizedCallable {
RegexpEscapeSummary() { this = "Regexp.escape" }
override MethodCall getACall() {
result = API::getTopLevelMember("Regexp").getAMethodCall(["escape", "quote"]).asExpr().getExpr()
}
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[0]" and
output = "ReturnValue" and
preservesValue = false
}
}

View File

@@ -0,0 +1,85 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about regexp
* injection vulnerabilities, as well as extension points for adding your own.
*/
private import ruby
private import codeql.ruby.DataFlow
private import codeql.ruby.Concepts
private import codeql.ruby.Frameworks
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.dataflow.BarrierGuards
private import codeql.ruby.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting
* regexp injection vulnerabilities, as well as extension points for
* adding your own.
*/
module RegExpInjection {
/**
* A data flow source for regexp injection vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for regexp injection vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer guard for regexp injection vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A data flow sanitized for regexp injection vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/** A regexp literal, considered as a flow sink. */
class RegExpLiteralAsSink extends Sink {
RegExpLiteralAsSink() { this.asExpr().getExpr() instanceof RegExpLiteral }
}
/**
* The first argument of a call to `Regexp.new` or `Regexp.compile`,
* considered as a flow sink.
*/
class ConstructedRegExpAsSink extends Sink {
ConstructedRegExpAsSink() {
exists(API::Node regexp, DataFlow::CallNode callNode |
regexp = API::getTopLevelMember("Regexp") and
(callNode = regexp.getAnInstantiation() or callNode = regexp.getAMethodCall("compile")) and
this = callNode.getArgument(0)
)
}
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
/**
* An inclusion check against an array of constant strings, considered as a
* sanitizer-guard.
*/
class StringConstArrayInclusionCallAsSanitizerGuard extends SanitizerGuard,
StringConstArrayInclusionCall { }
/**
* A call to `Regexp.escape` (or its alias, `Regexp.quote`), considered as a
* sanitizer.
*/
class RegexpEscapeSanitization extends Sanitizer {
RegexpEscapeSanitization() {
this = API::getTopLevelMember("Regexp").getAMethodCall(["escape", "quote"])
}
}
}

View File

@@ -0,0 +1,29 @@
/**
* Provides a taint-tracking configuration for detecting regexp injection vulnerabilities.
*
* Note, for performance reasons: only import this file if `Configuration` is needed,
* otherwise `RegExpInjectionCustomizations` should be imported instead.
*/
import codeql.ruby.DataFlow::DataFlow::PathGraph
import codeql.ruby.DataFlow
import codeql.ruby.TaintTracking
import RegExpInjectionCustomizations
import codeql.ruby.dataflow.BarrierGuards
/**
* A taint-tracking configuration for detecting regexp injection vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "RegExpInjection" }
override predicate isSource(DataFlow::Node source) { source instanceof RegExpInjection::Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof RegExpInjection::Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof RegExpInjection::SanitizerGuard
}
override predicate isSanitizer(DataFlow::Node node) { node instanceof RegExpInjection::Sanitizer }
}

View File

@@ -0,0 +1,49 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Constructing a regular expression with unsanitized user input is dangerous,
since a malicious user may be able to modify the meaning of the expression. In
particular, such a user may be able to provide a regular expression fragment
that takes exponential time in the worst case, and use that to perform a Denial
of Service attack.
</p>
</overview>
<recommendation>
<p>
Before embedding user input into a regular expression, use a sanitization
function such as <code>Regexp.escape</code> to escape meta-characters that have
special meaning.
</p>
</recommendation>
<example>
<p>
The following examples construct regular expressions from an HTTP request
parameter without sanitizing it first:
</p>
<sample src="examples/regexp_injection_bad.rb" />
<p>
Instead, the request parameter should be sanitized first. This ensures that the
user cannot insert characters that have special meanings in regular expressions.
</p>
<sample src="examples/regexp_injection_good.rb" />
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
</li>
<li>
Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.
</li>
<li>
Ruby: <a href="https://ruby-doc.org/core-3.0.2/Regexp.html#method-c-escape">Regexp.escape</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,26 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without
* first being escaped. Otherwise, a malicious user may be able to
* inject an expression that could require exponential time on
* certain inputs.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id rb/regexp-injection
* @tags security
* external/cwe/cwe-1333
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import ruby
import DataFlow::PathGraph
import codeql.ruby.DataFlow
import codeql.ruby.security.performance.RegExpInjectionQuery
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
where cfg.hasFlowPath(source, sink)
select sink.getNode(), source, sink, "This regular expression is constructed from a $@.",
source.getNode(), "user-provided value"

View File

@@ -0,0 +1,11 @@
class UsersController < ActionController::Base
def first_example
# BAD: Unsanitized user input is used to construct a regular expression
regex = /#{ params[:key] }/
end
def second_example
# BAD: Unsanitized user input is used to construct a regular expression
regex = Regexp.new(params[:key])
end
end

View File

@@ -0,0 +1,6 @@
class UsersController < ActionController::Base
def example
# GOOD: User input is sanitized before constructing the regular expression
regex = Regexp.new(Regex.escape(params[:key]))
end
end

View File

@@ -0,0 +1,24 @@
edges
| RegExpInjection.rb:4:12:4:17 | call to params : | RegExpInjection.rb:5:13:5:21 | /#{...}/ |
| RegExpInjection.rb:10:12:10:17 | call to params : | RegExpInjection.rb:11:13:11:27 | /foo#{...}bar/ |
| RegExpInjection.rb:16:12:16:17 | call to params : | RegExpInjection.rb:17:24:17:27 | name |
| RegExpInjection.rb:22:12:22:17 | call to params : | RegExpInjection.rb:23:24:23:33 | ... + ... |
| RegExpInjection.rb:54:12:54:17 | call to params : | RegExpInjection.rb:55:28:55:37 | ... + ... |
nodes
| RegExpInjection.rb:4:12:4:17 | call to params : | semmle.label | call to params : |
| RegExpInjection.rb:5:13:5:21 | /#{...}/ | semmle.label | /#{...}/ |
| RegExpInjection.rb:10:12:10:17 | call to params : | semmle.label | call to params : |
| RegExpInjection.rb:11:13:11:27 | /foo#{...}bar/ | semmle.label | /foo#{...}bar/ |
| RegExpInjection.rb:16:12:16:17 | call to params : | semmle.label | call to params : |
| RegExpInjection.rb:17:24:17:27 | name | semmle.label | name |
| RegExpInjection.rb:22:12:22:17 | call to params : | semmle.label | call to params : |
| RegExpInjection.rb:23:24:23:33 | ... + ... | semmle.label | ... + ... |
| RegExpInjection.rb:54:12:54:17 | call to params : | semmle.label | call to params : |
| RegExpInjection.rb:55:28:55:37 | ... + ... | semmle.label | ... + ... |
subpaths
#select
| RegExpInjection.rb:5:13:5:21 | /#{...}/ | RegExpInjection.rb:4:12:4:17 | call to params : | RegExpInjection.rb:5:13:5:21 | /#{...}/ | This regular expression is constructed from a $@. | RegExpInjection.rb:4:12:4:17 | call to params | user-provided value |
| RegExpInjection.rb:11:13:11:27 | /foo#{...}bar/ | RegExpInjection.rb:10:12:10:17 | call to params : | RegExpInjection.rb:11:13:11:27 | /foo#{...}bar/ | This regular expression is constructed from a $@. | RegExpInjection.rb:10:12:10:17 | call to params | user-provided value |
| RegExpInjection.rb:17:24:17:27 | name | RegExpInjection.rb:16:12:16:17 | call to params : | RegExpInjection.rb:17:24:17:27 | name | This regular expression is constructed from a $@. | RegExpInjection.rb:16:12:16:17 | call to params | user-provided value |
| RegExpInjection.rb:23:24:23:33 | ... + ... | RegExpInjection.rb:22:12:22:17 | call to params : | RegExpInjection.rb:23:24:23:33 | ... + ... | This regular expression is constructed from a $@. | RegExpInjection.rb:22:12:22:17 | call to params | user-provided value |
| RegExpInjection.rb:55:28:55:37 | ... + ... | RegExpInjection.rb:54:12:54:17 | call to params : | RegExpInjection.rb:55:28:55:37 | ... + ... | This regular expression is constructed from a $@. | RegExpInjection.rb:54:12:54:17 | call to params | user-provided value |

View File

@@ -0,0 +1 @@
queries/security/cwe-1333/RegExpInjection.ql

View File

@@ -0,0 +1,57 @@
class FooController < ActionController::Base
# BAD
def route0
name = params[:name]
regex = /#{name}/
end
# BAD
def route1
name = params[:name]
regex = /foo#{name}bar/
end
# BAD
def route2
name = params[:name]
regex = Regexp.new(name)
end
# BAD
def route3
name = params[:name]
regex = Regexp.new("@" + name)
end
# GOOD - string is compared against a constant string
def route4
name = params[:name]
regex = Regexp.new("@" + name) if name == "foo"
end
# GOOD - string is compared against a constant string array
def route5
name = params[:name]
if ["John", "Paul", "George", "Ringo"].include?(name)
regex = /@#{name}/
end
end
# GOOD - string is explicitly escaped
def route6
name = params[:name]
regex = Regexp.new(Regexp.escape(name))
end
# GOOD - string is explicitly escaped
def route7
name = params[:name]
regex = Regexp.new(Regexp.quote(name))
end
# BAD
def route8
name = params[:name]
regex = Regexp.compile("@" + name)
end
end