Add Unicode Bypass Validation query, test and help file

This commit is contained in:
Sim4n6
2023-05-02 15:36:39 +01:00
parent 2e5a04854e
commit 019b85beb6
10 changed files with 242 additions and 0 deletions

View File

@@ -0,0 +1,29 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "Unicode transformation"
* vulnerabilities, as well as extension points for adding your own.
*/
private import ruby
/**
* Provides default sources, sinks and sanitizers for detecting
* "Unicode transformation"
* vulnerabilities, as well as extension points for adding your own.
*/
module UnicodeBypassValidation {
/**
* A data flow source for "Unicode transformation" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "Unicode transformation" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer for "Unicode transformation" vulnerabilities.
*/
abstract class Sanitizer extends DataFlow::Node { }
}

View File

@@ -0,0 +1,61 @@
/**
* Provides a taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
*/
private import ruby
private import codeql.ruby.ApiGraphs
private import codeql.ruby.AST
private import codeql.ruby.Concepts
private import codeql.ruby.DataFlow
private import codeql.ruby.dataflow.RemoteFlowSources
private import codeql.ruby.TaintTracking
import UnicodeBypassValidationCustomizations::UnicodeBypassValidation
/** A state signifying that a logical validation has not been performed. */
class PreValidation extends DataFlow::FlowState {
PreValidation() { this = "PreValidation" }
}
/** A state signifying that a logical validation has been performed. */
class PostValidation extends DataFlow::FlowState {
PostValidation() { this = "PostValidation" }
}
/**
* A taint-tracking configuration for detecting "Unicode transformation mishandling" vulnerabilities.
*
* This configuration uses two flow states, `PreValidation` and `PostValidation`,
* to track the requirement that a logical validation has been performed before the Unicode Transformation.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnicodeBypassValidation" }
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
source instanceof RemoteFlowSource and state instanceof PreValidation
}
override predicate isAdditionalTaintStep(
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
DataFlow::FlowState stateTo
) {
(
exists(Escaping escaping | nodeFrom = escaping.getAnInput() and nodeTo = escaping.getOutput())
or
exists(RegexExecution re | nodeFrom = re.getString() and nodeTo = re)
// or
// stringManipulation(nodeFrom, nodeTo)
) and
stateFrom instanceof PreValidation and
stateTo instanceof PostValidation
}
/* A Unicode Tranformation (Unicode tranformation) is considered a sink when the algorithm used is either NFC or NFKC. */
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
exists(DataFlow::CallNode cn |
cn.getMethodName() = "unicode_normalize" and
cn.getArgument(0).toString() = [":nfkc", ":nfc"] and
sink = cn.getReceiver()
) and
state instanceof PostValidation
}
}

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `ruby/post-unicode-normalization`, to detect a misuse of a post-unicode normalization.

View File

@@ -0,0 +1,36 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>Security checks bypass due to a Unicode transformation</p>
<p>
If ever a unicode tranformation is performed after some security checks or logical
validation, the
latter could be bypassed due to a potential Unicode characters collision.
The validation of concern are any character escaping, any regex validation or any string
verification.
</p>
<img src="./vulnerability-flow.png" alt="Security checks bypassed" />
</overview>
<recommendation>
<p> Perform a Unicode normalization before the logical validation. </p>
</recommendation>
<example>
<p> The following example showcases the bypass of all checks performed by <code>
flask.escape()</code> due to a post-unicode normalization.</p>
<p>For instance: the character U+FE64 (<code>﹤</code>) is not filtered-out by the flask
escape function. But due to the Unicode normalization, the character is transformed and
would become U+003C (<code> &lt; </code> ).</p>
<sample src="./examples/unicode_normalization.rb" />
</example>
<references>
<li> Research study: <a
href="https://gosecure.github.io/presentations/2021-02-unicode-owasp-toronto/philippe_arteau_owasp_unicode_v4.pdf">
Unicode vulnerabilities that could bYte you
</a> and <a
href="https://gosecure.github.io/unicode-pentester-cheatsheet/">Unicode pentest
cheatsheet</a>. </li>
</references>
</qhelp>

View File

@@ -0,0 +1,22 @@
/**
* @name Bypass Logical Validation Using Unicode Characters
* @description A Unicode transformation is using a remote user-controlled data. The transformation is a Unicode normalization using the algorithms "NFC" or "NFKC". In all cases, the security measures implemented or the logical validation performed to escape any injection characters, to validate using regex patterns or to perform string-based checks, before the Unicode transformation are **bypassable** by special Unicode characters.
* @kind path-problem
* @id rb/unicode-bypass-validation
* @precision high
* @problem.severity error
* @tags security
* experimental
* external/cwe/cwe-176
*/
import ruby
import codeql.ruby.experimental.UnicodeBypassValidationQuery
import DataFlow::PathGraph
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters.",
sink.getNode(), "Unicode transformation (Unicode normalization)", source.getNode(),
"remote user-controlled data"

View File

@@ -0,0 +1,8 @@
class UnicodeNormalizationHtMLSafeController < ActionController::Base
def unicodeNormalize
unicode_input = params[:unicode_input]
unicode_html_safe = unicode_input.html_safe
normalized_nfkc = unicode_html_safe.unicode_normalize(:nfkc) # $result=BAD
normalized_nfc = unicode_html_safe.unicode_normalize(:nfc) # $result=BAD
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

View File

@@ -0,0 +1,56 @@
edges
| unicode_normalization.rb:3:5:3:17 | unicode_input : | unicode_normalization.rb:4:23:4:35 | unicode_input |
| unicode_normalization.rb:3:5:3:17 | unicode_input : | unicode_normalization.rb:5:22:5:34 | unicode_input |
| unicode_normalization.rb:3:21:3:26 | call to params : | unicode_normalization.rb:3:21:3:42 | ...[...] : |
| unicode_normalization.rb:3:21:3:42 | ...[...] : | unicode_normalization.rb:3:5:3:17 | unicode_input : |
| unicode_normalization.rb:11:5:11:17 | unicode_input : | unicode_normalization.rb:12:27:12:39 | unicode_input : |
| unicode_normalization.rb:11:5:11:17 | unicode_input : | unicode_normalization.rb:12:27:12:39 | unicode_input : |
| unicode_normalization.rb:11:21:11:26 | call to params : | unicode_normalization.rb:11:21:11:42 | ...[...] : |
| unicode_normalization.rb:11:21:11:26 | call to params : | unicode_normalization.rb:11:21:11:42 | ...[...] : |
| unicode_normalization.rb:11:21:11:42 | ...[...] : | unicode_normalization.rb:11:5:11:17 | unicode_input : |
| unicode_normalization.rb:11:21:11:42 | ...[...] : | unicode_normalization.rb:11:5:11:17 | unicode_input : |
| unicode_normalization.rb:12:5:12:23 | unicode_input_manip : | unicode_normalization.rb:13:23:13:41 | unicode_input_manip |
| unicode_normalization.rb:12:5:12:23 | unicode_input_manip : | unicode_normalization.rb:14:22:14:40 | unicode_input_manip |
| unicode_normalization.rb:12:27:12:39 | unicode_input : | unicode_normalization.rb:12:27:12:59 | call to sub : |
| unicode_normalization.rb:12:27:12:39 | unicode_input : | unicode_normalization.rb:12:27:12:59 | call to sub : |
| unicode_normalization.rb:12:27:12:59 | call to sub : | unicode_normalization.rb:12:5:12:23 | unicode_input_manip : |
| unicode_normalization.rb:20:5:20:17 | unicode_input : | unicode_normalization.rb:21:25:21:37 | unicode_input : |
| unicode_normalization.rb:20:21:20:26 | call to params : | unicode_normalization.rb:20:21:20:42 | ...[...] : |
| unicode_normalization.rb:20:21:20:42 | ...[...] : | unicode_normalization.rb:20:5:20:17 | unicode_input : |
| unicode_normalization.rb:21:5:21:21 | unicode_html_safe : | unicode_normalization.rb:22:23:22:39 | unicode_html_safe |
| unicode_normalization.rb:21:5:21:21 | unicode_html_safe : | unicode_normalization.rb:23:22:23:38 | unicode_html_safe |
| unicode_normalization.rb:21:25:21:37 | unicode_input : | unicode_normalization.rb:21:25:21:47 | call to html_safe : |
| unicode_normalization.rb:21:25:21:47 | call to html_safe : | unicode_normalization.rb:21:5:21:21 | unicode_html_safe : |
nodes
| unicode_normalization.rb:3:5:3:17 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:3:21:3:26 | call to params : | semmle.label | call to params : |
| unicode_normalization.rb:3:21:3:42 | ...[...] : | semmle.label | ...[...] : |
| unicode_normalization.rb:4:23:4:35 | unicode_input | semmle.label | unicode_input |
| unicode_normalization.rb:5:22:5:34 | unicode_input | semmle.label | unicode_input |
| unicode_normalization.rb:11:5:11:17 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:11:5:11:17 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:11:21:11:26 | call to params : | semmle.label | call to params : |
| unicode_normalization.rb:11:21:11:42 | ...[...] : | semmle.label | ...[...] : |
| unicode_normalization.rb:11:21:11:42 | ...[...] : | semmle.label | ...[...] : |
| unicode_normalization.rb:12:5:12:23 | unicode_input_manip : | semmle.label | unicode_input_manip : |
| unicode_normalization.rb:12:27:12:39 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:12:27:12:39 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:12:27:12:59 | call to sub : | semmle.label | call to sub : |
| unicode_normalization.rb:13:23:13:41 | unicode_input_manip | semmle.label | unicode_input_manip |
| unicode_normalization.rb:14:22:14:40 | unicode_input_manip | semmle.label | unicode_input_manip |
| unicode_normalization.rb:20:5:20:17 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:20:21:20:26 | call to params : | semmle.label | call to params : |
| unicode_normalization.rb:20:21:20:42 | ...[...] : | semmle.label | ...[...] : |
| unicode_normalization.rb:21:5:21:21 | unicode_html_safe : | semmle.label | unicode_html_safe : |
| unicode_normalization.rb:21:25:21:37 | unicode_input : | semmle.label | unicode_input : |
| unicode_normalization.rb:21:25:21:47 | call to html_safe : | semmle.label | call to html_safe : |
| unicode_normalization.rb:22:23:22:39 | unicode_html_safe | semmle.label | unicode_html_safe |
| unicode_normalization.rb:23:22:23:38 | unicode_html_safe | semmle.label | unicode_html_safe |
subpaths
#select
| unicode_normalization.rb:4:23:4:35 | unicode_input | unicode_normalization.rb:3:21:3:26 | call to params : | unicode_normalization.rb:4:23:4:35 | unicode_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:4:23:4:35 | unicode_input | Unicode transformation (Unicode normalization) | unicode_normalization.rb:3:21:3:26 | call to params | remote user-controlled data |
| unicode_normalization.rb:5:22:5:34 | unicode_input | unicode_normalization.rb:3:21:3:26 | call to params : | unicode_normalization.rb:5:22:5:34 | unicode_input | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:5:22:5:34 | unicode_input | Unicode transformation (Unicode normalization) | unicode_normalization.rb:3:21:3:26 | call to params | remote user-controlled data |
| unicode_normalization.rb:13:23:13:41 | unicode_input_manip | unicode_normalization.rb:11:21:11:26 | call to params : | unicode_normalization.rb:13:23:13:41 | unicode_input_manip | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:13:23:13:41 | unicode_input_manip | Unicode transformation (Unicode normalization) | unicode_normalization.rb:11:21:11:26 | call to params | remote user-controlled data |
| unicode_normalization.rb:14:22:14:40 | unicode_input_manip | unicode_normalization.rb:11:21:11:26 | call to params : | unicode_normalization.rb:14:22:14:40 | unicode_input_manip | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:14:22:14:40 | unicode_input_manip | Unicode transformation (Unicode normalization) | unicode_normalization.rb:11:21:11:26 | call to params | remote user-controlled data |
| unicode_normalization.rb:22:23:22:39 | unicode_html_safe | unicode_normalization.rb:20:21:20:26 | call to params : | unicode_normalization.rb:22:23:22:39 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:22:23:22:39 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:20:21:20:26 | call to params | remote user-controlled data |
| unicode_normalization.rb:23:22:23:38 | unicode_html_safe | unicode_normalization.rb:20:21:20:26 | call to params : | unicode_normalization.rb:23:22:23:38 | unicode_html_safe | This $@ processes unsafely $@ and any logical validation in-between could be bypassed using special Unicode characters. | unicode_normalization.rb:23:22:23:38 | unicode_html_safe | Unicode transformation (Unicode normalization) | unicode_normalization.rb:20:21:20:26 | call to params | remote user-controlled data |

View File

@@ -0,0 +1 @@
experimental/cwe-176/UnicodeBypassValidation.ql

View File

@@ -0,0 +1,25 @@
class UnicodeNormalizationOKController < ActionController::Base
def unicodeNormalize
unicode_input = params[:unicode_input]
normalized_nfkc = unicode_input.unicode_normalize(:nfkc) # $MISSING:result=OK
normalized_nfc = unicode_input.unicode_normalize(:nfc) # $MISSING:result=OK
end
end
class UnicodeNormalizationStrManipulationController < ActionController::Base
def unicodeNormalize
unicode_input = params[:unicode_input]
unicode_input_manip = unicode_input.sub(/[aeiou]/, "*")
normalized_nfkc = unicode_input_manip.unicode_normalize(:nfkc) # $result=BAD
normalized_nfc = unicode_input_manip.unicode_normalize(:nfc) # $result=BAD
end
end
class UnicodeNormalizationHtMLSafeController < ActionController::Base
def unicodeNormalize
unicode_input = params[:unicode_input]
unicode_html_safe = unicode_input.html_safe
normalized_nfkc = unicode_html_safe.unicode_normalize(:nfkc) # $result=BAD
normalized_nfc = unicode_html_safe.unicode_normalize(:nfc) # $result=BAD
end
end