Ruby: IncompleteMultiCharacterSanitization Query

This query is similar to IncompleteSanitization but for multi-character
sequences.
This commit is contained in:
Harry Maclean
2022-02-14 11:14:08 +13:00
parent 6e289a9db3
commit c234bd94d1
6 changed files with 419 additions and 0 deletions

View File

@@ -0,0 +1,209 @@
/**
* Provides predicates for reasoning about improper multi-character sanitization.
*/
private import ruby
private import codeql.ruby.regexp.RegExpTreeView as RETV
private import codeql.ruby.security.performance.ReDoSUtil as ReDoSUtil
private import codeql.ruby.DataFlow
private import codeql.ruby.frameworks.core.String
private import codeql.ruby.dataflow.internal.DataFlowDispatch
/**
* A regexp term that matches substrings that should be replaced with the empty string.
*/
class EmptyReplaceRegExpTerm extends RETV::RegExpTerm {
private StringSubstitutionCall call;
EmptyReplaceRegExpTerm() {
call.getReplacementString() = "" and
this = call.getPatternRegExp().getRegExpTerm().getAChild*()
}
/**
* Get the substitution call that uses this regexp term.
*/
StringSubstitutionCall getCall() { result = call }
}
/**
* A prefix that may be dangerous to sanitize explicitly.
*
* Note that this class exists solely as a (necessary) optimization for this query.
*/
private class DangerousPrefix extends string {
DangerousPrefix() {
this = ["/..", "../"] or
this = "<!--" or
this = "<" + ["iframe", "script", "cript", "scrip", "style"]
}
}
/**
* A substring of a prefix that may be dangerous to sanitize explicitly.
*/
private class DangerousPrefixSubstring extends string {
DangerousPrefixSubstring() {
exists(DangerousPrefix s | this = s.substring([0 .. s.length()], [0 .. s.length()]))
}
}
/**
* Gets a dangerous prefix that is in the prefix language of `t`.
*/
private DangerousPrefix getADangerousMatchedPrefix(EmptyReplaceRegExpTerm t) {
result = getADangerousMatchedPrefixSubstring(t) and
not exists(EmptyReplaceRegExpTerm pred |
pred = t.getPredecessor+() and not pred.matchesEmptyString()
)
}
pragma[noinline]
private DangerousPrefixSubstring getADangerousMatchedChar(EmptyReplaceRegExpTerm t) {
t.matchesEmptyString() and result = ""
or
result = t.getAMatch()
or
// A substring matched by some character class. This is only used to match the "word" part of a HTML tag (e.g. "iframe" in "<iframe").
exists(ReDoSUtil::CharacterClass cc |
cc = ReDoSUtil::getCanonicalCharClass(t) and
cc.matches(result) and
result.regexpMatch("\\w") and
// excluding character classes that match ">" (e.g. /<[^<]*>/), as these might consume nested HTML tags, and thus prevent the dangerous pattern this query is looking for.
not cc.matches(">")
)
or
t instanceof RETV::RegExpDot and
result.length() = 1
or
(
t instanceof RETV::RegExpOpt or
t instanceof RETV::RegExpStar or
t instanceof RETV::RegExpPlus or
t instanceof RETV::RegExpGroup or
t instanceof RETV::RegExpAlt
) and
result = getADangerousMatchedChar(t.getAChild())
}
/**
* Gets a substring of a dangerous prefix that is in the language starting at `t` (ignoring lookarounds).
*
* Note that the language of `t` is slightly restricted as not all RegExpTerm types are supported.
*/
private DangerousPrefixSubstring getADangerousMatchedPrefixSubstring(EmptyReplaceRegExpTerm t) {
result = getADangerousMatchedChar(t) + getADangerousMatchedPrefixSubstring(t.getSuccessor())
or
result = getADangerousMatchedChar(t)
or
// loop around for repetitions (only considering alphanumeric characters in the repetition)
exists(RepetitionMatcher repetition | t = repetition |
result = getADangerousMatchedPrefixSubstring(repetition) + repetition.getAChar()
)
}
private class RepetitionMatcher extends EmptyReplaceRegExpTerm {
string char;
pragma[noinline]
RepetitionMatcher() {
(this instanceof RETV::RegExpPlus or this instanceof RETV::RegExpStar) and
char = getADangerousMatchedChar(this.getAChild()) and
char.regexpMatch("\\w")
}
pragma[noinline]
string getAChar() { result = char }
}
/**
* Holds if `t` may match the dangerous `prefix` and some suffix, indicating intent to prevent a vulnerability of kind `kind`.
*/
private predicate matchesDangerousPrefix(EmptyReplaceRegExpTerm t, string prefix, string kind) {
prefix = getADangerousMatchedPrefix(t) and
(
kind = "path injection" and
prefix = ["/..", "../"] and
// If the regex is matching explicit path components, it is unlikely that it's being used as a sanitizer.
not t.getSuccessor*().getAMatch().regexpMatch("(?is).*[a-z0-9_-].*")
or
kind = "HTML element injection" and
(
// comments
prefix = "<!--" and
// If the regex is matching explicit textual content of an HTML comment, it is unlikely that it's being used as a sanitizer.
not t.getSuccessor*().getAMatch().regexpMatch("(?is).*[a-z0-9_].*")
or
// specific tags
// the `cript|scrip` case has been observed in the wild several times
prefix = "<" + ["iframe", "script", "cript", "scrip", "style"]
)
)
or
kind = "HTML attribute injection" and
prefix =
[
// ordinary event handler prefix
"on",
// angular prefixes
"ng-", "ng:", "data-ng-", "x-ng-"
] and
(
// explicit matching: `onclick` and `ng-bind`
t.getAMatch().regexpMatch("(?i)" + prefix + "[a-z]+")
or
// regexp-based matching: `on[a-z]+`
exists(EmptyReplaceRegExpTerm start | start = t.getAChild() |
start.getAMatch().regexpMatch("(?i)[^a-z]*" + prefix) and
isCommonWordMatcher(start.getSuccessor())
)
)
}
/**
* Holds if `t` is a common pattern for matching words
*/
private predicate isCommonWordMatcher(RETV::RegExpTerm t) {
exists(RETV::RegExpTerm quantified | quantified = t.(RETV::RegExpQuantifier).getChild(0) |
// [a-z]+ and similar
quantified
.(RETV::RegExpCharacterClass)
.getAChild()
.(RETV::RegExpCharacterRange)
.isRange(["a", "A"], ["z", "Z"])
or
// \w+ or [\w]+
[quantified, quantified.(RETV::RegExpCharacterClass).getAChild()]
.(RETV::RegExpCharacterClassEscape)
.getValue() = "w"
)
}
/**
* Holds if `replace` has a pattern argument containing a regular expression
* `dangerous` which matches a dangerous string beginning with `prefix`, in
* attempt to avoid a vulnerability of kind `kind`.
*/
predicate hasResult(
StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
) {
exists(EmptyReplaceRegExpTerm regexp |
replace = regexp.getCall() and
dangerous.getRootTerm() = regexp and
// skip leading optional elements
not dangerous.matchesEmptyString() and
// only warn about the longest match
prefix = max(string m | matchesDangerousPrefix(dangerous, m, kind) | m order by m.length(), m) and
// only warn once per kind
not exists(EmptyReplaceRegExpTerm other |
other = dangerous.getAChild+() or other = dangerous.getPredecessor+()
|
matchesDangerousPrefix(other, _, kind) and
not other.matchesEmptyString()
) and
not exists(RETV::RegExpCaret c | regexp = c.getRootTerm()) and
not exists(RETV::RegExpDollar d | regexp = d.getRootTerm()) and
// Don't flag replace operations that are called repeatedly in a loop, as they can actually work correctly.
not replace.flowsTo(replace.getReceiver+())
)
}

View File

@@ -0,0 +1,8 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<include src="IncompleteSanitization.qhelp" />
</qhelp>

View File

@@ -0,0 +1,24 @@
/**
* @name Incomplete multi-character sanitization
* @description A sanitizer that removes a sequence of characters may reintroduce the dangerous sequence.
* @kind problem
* @problem.severity warning
* @security-severity 7.8
* @precision high
* @id rb/incomplete-multi-character-sanitization
* @tags correctness
* security
* external/cwe/cwe-020
* external/cwe/cwe-080
* external/cwe/cwe-116
*/
import ruby
import codeql.ruby.frameworks.core.String
import codeql.ruby.DataFlow
import codeql.ruby.security.IncompleteMultiCharacterSanitizationQuery
from StringSubstitutionCall replace, EmptyReplaceRegExpTerm dangerous, string prefix, string kind
where hasResult(replace, dangerous, prefix, kind)
select replace, "This string may still contain $@, which may cause a " + kind + " vulnerability.",
dangerous, prefix

View File

@@ -0,0 +1,41 @@
/**
* @kind problem
*/
import ruby
import codeql.ruby.security.IncompleteMultiCharacterSanitizationQuery as Query
import codeql.ruby.frameworks.core.String
import TestUtilities.InlineExpectationsTest
class Test extends InlineExpectationsTest {
Test() { this = "IncompleteMultiCharacterSanitizationTest" }
override string getARelevantTag() { result = "hasResult" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
tag = "hasResult" and
hasResult(location, element, value)
}
}
predicate hasResult(Location location, string element, string value) {
exists(
StringSubstitutionCall replace, Query::EmptyReplaceRegExpTerm dangerous, string prefix,
string kind
|
replace.getLocation() = location and
element = replace.toString() and
value = shortKind(kind)
|
Query::hasResult(replace, dangerous, prefix, kind)
)
}
bindingset[kind]
string shortKind(string kind) {
kind = "HTML element injection" and result = "html"
or
kind = "path injection" and result = "path"
or
kind = "HTML attribute injection" and result = "attr"
}

View File

@@ -0,0 +1,137 @@
# CVE-2019-10756
def m1(content)
content = content.gsub(/<.*cript.*\/scrip.*>/i, "") # $ hasResult=html
content = content.gsub(/ on\w+=".*"/, "") # $ hasResult=attr
content = content.gsub(/ on\w+=\'.*\'/, "") # $ hasResult=attr
content
end
def m2(content)
content = content.gsub(/<.*cript.*/i, "") # $ hasResult=html
content = content.gsub(/.on\w+=.*".*"/, "") # $ hasResult=attr
content = content.gsub(/.on\w+=.*\'.*\'/, "") # $ hasResult=attr
content
end
# CVE-2020-7656
def m3(text)
rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/i
text.gsub(rscript, "") # $ hasResult=html
text
end
# CVE-2019-1010091
def m4(text)
text.gsub(/<!--|--!?>/, "") # $ hasResult=html
end
def m5(text)
while /<!--|--!?>/.match?(text)
text = text.gsub(/<!--|--!?>/, "") # OK
end
text
end
# CVE-2019-10767
def m6(id)
id.gsub(/\.\./, "") # OK (can not contain '..' afterwards)
end
def m7(id)
id.gsub(/[\]\[*,'"`<>\\?\/]/, "") # OK (or is it?)
end
# CVE-2019-8903
REG_TRAVEL = /(\/)?\.\.\//
def m8(req)
req.url = req.url.gsub(REG_TRAVEL, "") # $ hasResult=path
end
# New cases
def m9(x)
x = x.gsub(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/, "") # $ hasResult=html
x = x.gsub(/(\/|\s)on\w+=(\'|")?[^"]*(\'|")?/, "") # $ hasResult=attr
x = x.gsub(/<\/script>/, "") # OK
x = x.gsub(/<(.)?br(.)?>/, "") # OK
x = x.gsub(/<\/?b>/, "") # OK
x = x.gsub(/<(ul|ol)><\/(ul|ol)>/i, "") # OK
x = x.gsub(/<li><\/li>/i, "") # OK
x = x.gsub(/<!--(.*?)-->/m, "") # $ hasResult=html
x = x.gsub(/\sng-[a-z-]+/, "") # $ hasResult=attr
x = x.gsub(/\sng-[a-z-]+/, "") # $ hasResult=attr
x = x.gsub(/(<!--\[CDATA\[|\]\]-->)/, "\n") # OK: not a sanitizer
x = x.gsub(/<script.+desktop\-only.+<\/script>/, "") # $ SPURIOUS: hasResult=html SPURIOUS: hasResult=attr
x = x.gsub(/<script async.+?<\/script>/, "") # OK
x = x.gsub(/<!--[\s\S]*?-->|<\?(?:php)?[\s\S]*?\?>/i, "") # $ hasResult=html
x = x.gsub(/\x2E\x2E\x2F\x2E\x2E\x2F/, "") # NOT OK (matches "../../") $ hasResult=path
x = x.gsub(/<script.*>.*<\/script>/i, "") # $ hasResult=html
x = x.gsub(/^(\.\.\/?)+/, "") # OK
# NOT OK
x = x.gsub(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/) do |match| # $ hasResult=html
if unknown then match else "" end
end
x = x.gsub(/<\/?([a-z][a-z0-9]*)\b[^>]*>/i, "") # NOT OK [INCONSISTENCY] $ hasResult=html
x = x.gsub(/\.\./, "") # OK
x = x.gsub(/\.\.\//, "") # $ hasResult=path
x = x.gsub(/\/\.\./, "") # $ hasResult=path
x = x.gsub(/<script(.*?)>([\s\S]*?)<\/script>/i, "") # $ hasResult=html
x = x.gsub(/<(script|del)(?=[\s>])[\w\W]*?<\/\1\s*>/i, "") # $ hasResult=html
x = x.gsub(/\<script[\s\S]*?\>[\s\S]*?\<\/script\>/, "") # $ hasResult=html
x = x.gsub(/<(script|style|title)[^<]+<\/(script|style|title)>/m, "") # $ hasResult=html
x = x.gsub(/<script[^>]*>([\s\S]*?)<\/script>/i, "") # $ hasResult=html
x = x.gsub(/<script[\s\S]*?<\/script>/i, "") # $ hasResult=html
x = x.gsub(/ ?<!-- ?/, "") # $ hasResult=html
x = x.gsub(/require\('\.\.\/common'\)/, "") # OK
x = x.gsub(/\.\.\/\.\.\/lib\//, "") # OK
# TODO: make Rubyish
while x.include? "."
x = x
.gsub(/^\.\//, "")
.gsub(/\/\.\//, "/")
.gsub(/[^\/]*\/\.\.\//, "") # OK
end
x = x.gsub(/([^.\s]+\.)+/, "") # OK
x = x.gsub(/<!\-\-DEVEL[\d\D]*?DEVEL\-\->/, "") # OK
x = x # $ hasResult=path
.gsub(/^\.\//, "")
.gsub(/\/\.\//, "/")
.gsub(/[^\/]*\/\.\.\//, "")
x
end
def m10(content)
content.gsub(/<script.*\/script>/i, "") # $ hasResult=html
content.gsub(/<(script).*\/script>/i, "") # $ hasResult=html
content.gsub(/.+<(script).*\/script>/i, "") # $ hasResult=html
content.gsub(/.*<(script).*\/script>/i, "") # $ hasResult=html
end
def m11(content)
content = content.gsub(/<script[\s\S]*?<\/script>/i, "") # $ hasResult=html
content = content.gsub(/<[a-zA-Z\/](.|\n)*?>/, '') || ' ' # $ hasResult=html
content = content.gsub(/<(script|iframe|video)[\s\S]*?<\/(script|iframe|video)>/, '') # $ hasResult=html
content = content.gsub(/<(script|iframe|video)(.|\s)*?\/(script|iframe|video)>/, '') # $ hasResult=html
content = content.gsub(/<[^<]*>/, "") # OK
end