mirror of
https://github.com/github/codeql.git
synced 2025-12-24 12:46:34 +01:00
Merge pull request #8354 from aibaars/incomplete-url-string-sanitization
Incomplete url string sanitization
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Sanitizing untrusted URLs is an important technique for
|
||||
preventing attacks such as request forgeries and malicious
|
||||
redirections. Usually, this is done by checking that the host of a URL
|
||||
is in a set of allowed hosts.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
However, treating the URL as a string and checking if one of the
|
||||
allowed hosts is a substring of the URL is very prone to errors.
|
||||
Malicious URLs can bypass such security checks by embedding one
|
||||
of the allowed hosts in an unexpected location.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
Even if the substring check is not used in a
|
||||
security-critical context, the incomplete check may still cause
|
||||
undesirable behaviors when the check succeeds accidentally.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
|
||||
Parse a URL before performing a check on its host value,
|
||||
and ensure that the check handles arbitrary subdomain sequences
|
||||
correctly.
|
||||
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
The following example code checks that a URL redirection
|
||||
will reach the <code>example.com</code> domain, or one of its
|
||||
subdomains, and not some malicious site.
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="examples/IncompleteUrlSubstringSanitization_BAD1.rb"/>
|
||||
|
||||
<p>
|
||||
|
||||
The substring check is, however, easy to bypass. For example
|
||||
by embedding <code>example.com</code> in the path component:
|
||||
<code>http://evil-example.net/example.com</code>, or in the query
|
||||
string component: <code>http://evil-example.net/?x=example.com</code>.
|
||||
|
||||
Address these shortcomings by checking the host of the parsed URL instead:
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="examples/IncompleteUrlSubstringSanitization_BAD2.rb"/>
|
||||
|
||||
<p>
|
||||
|
||||
This is still not a sufficient check as the
|
||||
following URLs bypass it: <code>http://evil-example.com</code>
|
||||
<code>http://example.com.evil-example.net</code>.
|
||||
|
||||
Instead, use an explicit whitelist of allowed hosts to
|
||||
make the redirect secure:
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="examples/IncompleteUrlSubstringSanitization_GOOD.rb"/>
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,14 @@
|
||||
/**
|
||||
* @name Incomplete URL substring sanitization
|
||||
* @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id rb/incomplete-url-substring-sanitization
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
import IncompleteUrlSubstringSanitization
|
||||
@@ -0,0 +1,62 @@
|
||||
/**
|
||||
* Incomplete URL substring sanitization
|
||||
*/
|
||||
|
||||
private import IncompleteUrlSubstringSanitizationSpecific
|
||||
|
||||
/**
|
||||
* A check on a string for whether it contains a given substring, possibly with restrictions on the location of the substring.
|
||||
*/
|
||||
class SomeSubstringCheck extends DataFlow::Node {
|
||||
DataFlow::Node substring;
|
||||
|
||||
SomeSubstringCheck() {
|
||||
this.(StringOps::StartsWith).getSubstring() = substring or
|
||||
this.(StringOps::Includes).getSubstring() = substring or
|
||||
this.(StringOps::EndsWith).getSubstring() = substring
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the substring.
|
||||
*/
|
||||
DataFlow::Node getSubstring() { result = substring }
|
||||
}
|
||||
|
||||
/** Holds if there is an incomplete URL substring sanitization problem */
|
||||
query predicate problems(
|
||||
SomeSubstringCheck check, string msg, DataFlow::Node substring, string target
|
||||
) {
|
||||
substring = check.getSubstring() and
|
||||
mayHaveStringValue(substring, target) and
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
|
||||
"(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD with path elements, and the check is an includes check
|
||||
check instanceof StringOps::Includes and
|
||||
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/[a-z0-9/_-]+")
|
||||
) and
|
||||
(
|
||||
if check instanceof StringOps::StartsWith
|
||||
then msg = "'$@' may be followed by an arbitrary host name."
|
||||
else
|
||||
if check instanceof StringOps::EndsWith
|
||||
then msg = "'$@' may be preceded by an arbitrary host name."
|
||||
else msg = "'$@' can be anywhere in the URL, and arbitrary hosts may come before or after it."
|
||||
) and
|
||||
// whitelist
|
||||
not (
|
||||
// the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
|
||||
check instanceof StringOps::EndsWith and
|
||||
target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
|
||||
or
|
||||
// the trailing port or slash makes the prefix-check safe
|
||||
check instanceof StringOps::StartsWith and
|
||||
target.regexpMatch(".*(:[0-9]+|/)")
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
import codeql.ruby.DataFlow
|
||||
import codeql.ruby.StringOps
|
||||
import codeql.ruby.security.performance.RegExpTreeView::RegExpPatterns as RegExpPatterns
|
||||
|
||||
/** Holds if `node` may evaluate to `value` */
|
||||
predicate mayHaveStringValue(DataFlow::Node node, string value) {
|
||||
node.asExpr().getConstantValue().getString() = value
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
class AppController < ApplicationController
|
||||
def index
|
||||
url = params[:url]
|
||||
# BAD: the host of `url` may be controlled by an attacker
|
||||
if url.include?("example.com")
|
||||
redirect_to url
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,10 @@
|
||||
class AppController < ApplicationController
|
||||
def index
|
||||
url = params[:url]
|
||||
host = URI(url).host
|
||||
# BAD: the host of `url` may be controlled by an attacker
|
||||
if host.include?("example.com")
|
||||
redirect_to url
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,15 @@
|
||||
class AppController < ApplicationController
|
||||
def index
|
||||
url = params[:url]
|
||||
host = URI(url).host
|
||||
# GOOD: the host of `url` can not be controlled by an attacker
|
||||
allowedHosts = [
|
||||
'example.com',
|
||||
'beta.example.com',
|
||||
'www.example.com'
|
||||
]
|
||||
if allowedHosts.include?(host)
|
||||
redirect_to url
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user