mirror of
https://github.com/github/codeql.git
synced 2025-12-22 11:46:32 +01:00
Merge remote-tracking branch 'upstream/main' into incomplete-url-string-sanitization
Conflicts: config/identical-files.json javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll ruby/ql/src/queries/security/cwe-020/IncompleteUrlSubstringSanitization.qll
This commit is contained in:
@@ -25,7 +25,7 @@ DataFlow::Node relevantTaintSink(string kind) {
|
||||
or
|
||||
kind = "CommandInjection" and result instanceof CommandInjection::Sink
|
||||
or
|
||||
kind = "XSS" and result instanceof ReflectedXSS::Sink
|
||||
kind = "XSS" and result instanceof ReflectedXss::Sink
|
||||
or
|
||||
kind = "PathInjection" and result instanceof PathInjection::Sink
|
||||
or
|
||||
|
||||
202
ruby/ql/src/queries/security/cwe-020/HostnameRegexpShared.qll
Normal file
202
ruby/ql/src/queries/security/cwe-020/HostnameRegexpShared.qll
Normal file
@@ -0,0 +1,202 @@
|
||||
/**
|
||||
* Provides predicates for reasoning about regular expressions
|
||||
* that match URLs and hostname patterns.
|
||||
*/
|
||||
|
||||
private import HostnameRegexpSpecific
|
||||
|
||||
/**
|
||||
* Holds if the given constant is unlikely to occur in the origin part of a URL.
|
||||
*/
|
||||
predicate isConstantInvalidInsideOrigin(RegExpConstant term) {
|
||||
// Look for any of these cases:
|
||||
// - A character that can't occur in the origin
|
||||
// - Two dashes in a row
|
||||
// - A colon that is not part of port or scheme separator
|
||||
// - A slash that is not part of scheme separator
|
||||
term.getValue().regexpMatch(".*(?:[^a-zA-Z0-9.:/-]|--|:[^0-9/]|(?<![/:]|^)/).*")
|
||||
}
|
||||
|
||||
/** Holds if `term` is a dot constant of form `\.` or `[.]`. */
|
||||
predicate isDotConstant(RegExpTerm term) {
|
||||
term.(RegExpCharEscape).getValue() = "."
|
||||
or
|
||||
exists(RegExpCharacterClass cls |
|
||||
term = cls and
|
||||
not cls.isInverted() and
|
||||
cls.getNumChild() = 1 and
|
||||
cls.getAChild().(RegExpConstant).getValue() = "."
|
||||
)
|
||||
}
|
||||
|
||||
/** Holds if `term` is a wildcard `.` or an actual `.` character. */
|
||||
predicate isDotLike(RegExpTerm term) {
|
||||
term instanceof RegExpDot
|
||||
or
|
||||
isDotConstant(term)
|
||||
}
|
||||
|
||||
/** Holds if `term` will only ever be matched against the beginning of the input. */
|
||||
predicate matchesBeginningOfString(RegExpTerm term) {
|
||||
term.isRootTerm()
|
||||
or
|
||||
exists(RegExpTerm parent | matchesBeginningOfString(parent) |
|
||||
term = parent.(RegExpSequence).getChild(0)
|
||||
or
|
||||
parent.(RegExpSequence).getChild(0) instanceof RegExpCaret and
|
||||
term = parent.(RegExpSequence).getChild(1)
|
||||
or
|
||||
term = parent.(RegExpAlt).getAChild()
|
||||
or
|
||||
term = parent.(RegExpGroup).getAChild()
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given sequence contains top-level domain preceded by a dot, such as `.com`,
|
||||
* excluding cases where this is at the very beginning of the regexp.
|
||||
*
|
||||
* `i` is bound to the index of the last child in the top-level domain part.
|
||||
*/
|
||||
predicate hasTopLevelDomainEnding(RegExpSequence seq, int i) {
|
||||
seq.getChild(i)
|
||||
.(RegExpConstant)
|
||||
.getValue()
|
||||
.regexpMatch("(?i)" + RegExpPatterns::getACommonTld() + "(:\\d+)?([/?#].*)?") and
|
||||
isDotLike(seq.getChild(i - 1)) and
|
||||
not (i = 1 and matchesBeginningOfString(seq))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given regular expression term contains top-level domain preceded by a dot,
|
||||
* such as `.com`.
|
||||
*/
|
||||
predicate hasTopLevelDomainEnding(RegExpSequence seq) { hasTopLevelDomainEnding(seq, _) }
|
||||
|
||||
/**
|
||||
* Holds if `term` will always match a hostname, that is, all disjunctions contain
|
||||
* a hostname pattern that isn't inside a quantifier.
|
||||
*/
|
||||
predicate alwaysMatchesHostname(RegExpTerm term) {
|
||||
hasTopLevelDomainEnding(term, _)
|
||||
or
|
||||
// `localhost` is considered a hostname pattern, but has no TLD
|
||||
term.(RegExpConstant).getValue().regexpMatch("\\blocalhost\\b")
|
||||
or
|
||||
not term instanceof RegExpAlt and
|
||||
not term instanceof RegExpQuantifier and
|
||||
alwaysMatchesHostname(term.getAChild())
|
||||
or
|
||||
alwaysMatchesHostnameAlt(term)
|
||||
}
|
||||
|
||||
/** Holds if every child of `alt` contains a hostname pattern. */
|
||||
predicate alwaysMatchesHostnameAlt(RegExpAlt alt) {
|
||||
alwaysMatchesHostnameAlt(alt, alt.getNumChild() - 1)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the first `i` children of `alt` contains a hostname pattern.
|
||||
*
|
||||
* This is used instead of `forall` to avoid materializing the set of alternatives
|
||||
* that don't contains hostnames, which is much larger.
|
||||
*/
|
||||
predicate alwaysMatchesHostnameAlt(RegExpAlt alt, int i) {
|
||||
alwaysMatchesHostname(alt.getChild(0)) and i = 0
|
||||
or
|
||||
alwaysMatchesHostnameAlt(alt, i - 1) and
|
||||
alwaysMatchesHostname(alt.getChild(i))
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `term` occurs inside a quantifier or alternative (and thus
|
||||
* can not be expected to correspond to a unique match), or as part of
|
||||
* a lookaround assertion (which are rarely used for capture groups).
|
||||
*/
|
||||
predicate isInsideChoiceOrSubPattern(RegExpTerm term) {
|
||||
exists(RegExpParent parent | parent = term.getParent() |
|
||||
parent instanceof RegExpAlt
|
||||
or
|
||||
parent instanceof RegExpQuantifier
|
||||
or
|
||||
parent instanceof RegExpSubPattern
|
||||
or
|
||||
isInsideChoiceOrSubPattern(parent)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `group` is likely to be used as a capture group.
|
||||
*/
|
||||
predicate isLikelyCaptureGroup(RegExpGroup group) {
|
||||
group.isCapture() and
|
||||
not isInsideChoiceOrSubPattern(group)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `seq` contains two consecutive dots `..` or escaped dots.
|
||||
*
|
||||
* At least one of these dots is not intended to be a subdomain separator,
|
||||
* so we avoid flagging the pattern in this case.
|
||||
*/
|
||||
predicate hasConsecutiveDots(RegExpSequence seq) {
|
||||
exists(int i |
|
||||
isDotLike(seq.getChild(i)) and
|
||||
isDotLike(seq.getChild(i + 1))
|
||||
)
|
||||
}
|
||||
|
||||
predicate isIncompleteHostNameRegExpPattern(RegExpTerm regexp, RegExpSequence seq, string msg) {
|
||||
seq = regexp.getAChild*() and
|
||||
exists(RegExpDot unescapedDot, int i, string hostname |
|
||||
hasTopLevelDomainEnding(seq, i) and
|
||||
not isConstantInvalidInsideOrigin(seq.getChild([0 .. i - 1]).getAChild*()) and
|
||||
not isLikelyCaptureGroup(seq.getChild([i .. seq.getNumChild() - 1]).getAChild*()) and
|
||||
unescapedDot = seq.getChild([0 .. i - 1]).getAChild*() and
|
||||
unescapedDot != seq.getChild(i - 1) and // Should not be the '.' immediately before the TLD
|
||||
not hasConsecutiveDots(unescapedDot.getParent()) and
|
||||
hostname =
|
||||
seq.getChild(i - 2).getRawValue() + seq.getChild(i - 1).getRawValue() +
|
||||
seq.getChild(i).getRawValue()
|
||||
|
|
||||
if unescapedDot.getParent() instanceof RegExpQuantifier
|
||||
then
|
||||
// `.*\.example.com` can match `evil.com/?x=.example.com`
|
||||
//
|
||||
// This problem only occurs when the pattern is applied against a full URL, not just a hostname/origin.
|
||||
// We therefore check if the pattern includes a suffix after the TLD, such as `.*\.example.com/`.
|
||||
// Note that a post-anchored pattern (`.*\.example.com$`) will usually fail to match a full URL,
|
||||
// and patterns with neither a suffix nor an anchor fall under the purview of MissingRegExpAnchor.
|
||||
seq.getChild(0) instanceof RegExpCaret and
|
||||
not seq.getAChild() instanceof RegExpDollar and
|
||||
seq.getChild([i .. i + 1]).(RegExpConstant).getValue().regexpMatch(".*[/?#].*") and
|
||||
msg =
|
||||
"has an unrestricted wildcard '" + unescapedDot.getParent().(RegExpQuantifier).getRawValue()
|
||||
+ "' which may cause '" + hostname +
|
||||
"' to be matched anywhere in the URL, outside the hostname."
|
||||
else
|
||||
msg =
|
||||
"has an unescaped '.' before '" + hostname +
|
||||
"', so it might match more hosts than expected."
|
||||
)
|
||||
}
|
||||
|
||||
predicate incompleteHostnameRegExp(
|
||||
RegExpSequence hostSequence, string message, DataFlow::Node aux, string label
|
||||
) {
|
||||
exists(RegExpPatternSource re, RegExpTerm regexp, string msg, string kind |
|
||||
regexp = re.getRegExpTerm() and
|
||||
isIncompleteHostNameRegExpPattern(regexp, hostSequence, msg) and
|
||||
(
|
||||
if re.getAParse() != re
|
||||
then (
|
||||
kind = "string, which is used as a regular expression $@," and
|
||||
aux = re.getAParse()
|
||||
) else (
|
||||
kind = "regular expression" and aux = re
|
||||
)
|
||||
)
|
||||
|
|
||||
message = "This " + kind + " " + msg and label = "here"
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
import codeql.ruby.DataFlow
|
||||
@@ -0,0 +1,72 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Sanitizing untrusted URLs is an important technique for
|
||||
preventing attacks such as request forgeries and malicious
|
||||
redirections. Often, this is done by checking that the host of a URL
|
||||
is in a set of allowed hosts.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
If a regular expression implements such a check, it is
|
||||
easy to accidentally make the check too permissive by not escaping the
|
||||
<code>.</code> meta-characters appropriately.
|
||||
|
||||
Even if the check is not used in a security-critical
|
||||
context, the incomplete check may still cause undesirable behaviors
|
||||
when it accidentally succeeds.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
|
||||
Escape all meta-characters appropriately when constructing
|
||||
regular expressions for security checks, and pay special attention to the
|
||||
<code>.</code> meta-character.
|
||||
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
The following example code checks that a URL redirection
|
||||
will reach the <code>example.com</code> domain, or one of its
|
||||
subdomains.
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="examples/IncompleteHostnameRegExp.rb"/>
|
||||
|
||||
<p>
|
||||
|
||||
The check is however easy to bypass because the unescaped
|
||||
<code>.</code> allows for any character before
|
||||
<code>example.com</code>, effectively allowing the redirect to go to
|
||||
an attacker-controlled domain such as <code>wwwXexample.com</code>.
|
||||
|
||||
</p>
|
||||
<p>
|
||||
|
||||
Address this vulnerability by escaping <code>.</code>
|
||||
appropriately: <code>regex = /^((www|beta)\.)?example\.com/</code>.
|
||||
|
||||
</p>
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* @name Incomplete regular expression for hostnames
|
||||
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id rb/incomplete-hostname-regexp
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
import HostnameRegexpShared
|
||||
|
||||
query predicate problems = incompleteHostnameRegExp/4;
|
||||
@@ -31,7 +31,7 @@ query predicate problems(
|
||||
(
|
||||
// target contains a domain on a common TLD, and perhaps some other URL components
|
||||
target
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::commonTLD() +
|
||||
.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+" + RegExpPatterns::getACommonTld() +
|
||||
"(:[0-9]+)?/?")
|
||||
or
|
||||
// target is a HTTP URL to a domain on any TLD
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
class AppController < ApplicationController
|
||||
|
||||
def index
|
||||
url = params[:url]
|
||||
host = URI(url).host
|
||||
# BAD: the host of `url` may be controlled by an attacker
|
||||
regex = /^((www|beta).)?example.com/
|
||||
if host.match(regex)
|
||||
redirect_to url
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
@@ -18,7 +18,7 @@ import codeql.ruby.security.ReflectedXSSQuery
|
||||
import codeql.ruby.DataFlow
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from ReflectedXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from ReflectedXss::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@.",
|
||||
source.getNode(), "a user-provided value"
|
||||
|
||||
@@ -17,7 +17,7 @@ import codeql.ruby.security.StoredXSSQuery
|
||||
import codeql.ruby.DataFlow
|
||||
import DataFlow::PathGraph
|
||||
|
||||
from StoredXSS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from StoredXss::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "Cross-site scripting vulnerability due to $@",
|
||||
source.getNode(), "stored value"
|
||||
|
||||
@@ -19,8 +19,8 @@ import codeql.ruby.dataflow.RemoteFlowSources
|
||||
import codeql.ruby.TaintTracking
|
||||
import DataFlow::PathGraph
|
||||
|
||||
class SQLInjectionConfiguration extends TaintTracking::Configuration {
|
||||
SQLInjectionConfiguration() { this = "SQLInjectionConfiguration" }
|
||||
class SqlInjectionConfiguration extends TaintTracking::Configuration {
|
||||
SqlInjectionConfiguration() { this = "SQLInjectionConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
|
||||
|
||||
@@ -32,7 +32,7 @@ class SQLInjectionConfiguration extends TaintTracking::Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
from SQLInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
from SqlInjectionConfiguration config, DataFlow::PathNode source, DataFlow::PathNode sink
|
||||
where config.hasFlowPath(source, sink)
|
||||
select sink.getNode(), source, sink, "This SQL query depends on $@.", source.getNode(),
|
||||
"a user-provided value"
|
||||
|
||||
@@ -16,6 +16,6 @@
|
||||
|
||||
import codeql.ruby.security.BadTagFilterQuery
|
||||
|
||||
from HTMLMatchingRegExp regexp, string msg
|
||||
from HtmlMatchingRegExp regexp, string msg
|
||||
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
|
||||
select regexp, msg
|
||||
|
||||
Reference in New Issue
Block a user