mirror of
https://github.com/github/codeql.git
synced 2025-12-16 16:53:25 +01:00
Merge pull request #14034 from geoffw0/hostname
Swift: New query: Incomplete regular expression for hostnames
This commit is contained in:
@@ -8,6 +8,35 @@ private import codeql.swift.dataflow.DataFlow
|
||||
private import internal.ParseRegex
|
||||
private import internal.RegexTracking
|
||||
|
||||
/**
|
||||
* A data flow node whose value may flow to a position where it is interpreted
|
||||
* as a part of a regular expression. For example the string literal
|
||||
* `"(a|b).*"` in:
|
||||
* ```
|
||||
* Regex("(a|b).*").firstMatch(in: myString)
|
||||
* ```
|
||||
*/
|
||||
abstract class RegexPatternSource extends DataFlow::Node {
|
||||
/**
|
||||
* Gets a node where the pattern of this node is parsed as a part of
|
||||
* a regular expression.
|
||||
*/
|
||||
abstract DataFlow::Node getAParse();
|
||||
|
||||
/**
|
||||
* Gets the root term of the regular expression parsed from this pattern.
|
||||
*/
|
||||
abstract RegExpTerm getRegExpTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* For each `RegexPatternSource` data flow node, the corresponding `Expr` is
|
||||
* a `Regex`. This is a simple wrapper to make that happen.
|
||||
*/
|
||||
private class RegexFromRegexPatternSource extends RegExp {
|
||||
RegexFromRegexPatternSource() { this = any(RegexPatternSource node).asExpr() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A string literal that is used as a regular expression. For example
|
||||
* the string literal `"(a|b).*"` in:
|
||||
@@ -15,16 +44,18 @@ private import internal.RegexTracking
|
||||
* Regex("(a|b).*").firstMatch(in: myString)
|
||||
* ```
|
||||
*/
|
||||
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
|
||||
private class ParsedStringRegex extends RegexPatternSource {
|
||||
StringLiteralExpr expr;
|
||||
DataFlow::Node use;
|
||||
|
||||
ParsedStringRegex() { StringLiteralUseFlow::flow(DataFlow::exprNode(this), use) }
|
||||
ParsedStringRegex() {
|
||||
expr = this.asExpr() and
|
||||
StringLiteralUseFlow::flow(this, use)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a dataflow node where this string literal is used as a regular
|
||||
* expression.
|
||||
*/
|
||||
DataFlow::Node getUse() { result = use }
|
||||
override DataFlow::Node getAParse() { result = use }
|
||||
|
||||
override RegExpTerm getRegExpTerm() { result.getRegExp() = expr }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -246,11 +277,11 @@ abstract class RegexEval extends CallExpr {
|
||||
*/
|
||||
RegExp getARegex() {
|
||||
// string literal used directly as a regex
|
||||
result.(ParsedStringRegex).getUse().asExpr() = this.getRegexInput()
|
||||
DataFlow::exprNode(result).(ParsedStringRegex).getAParse().asExpr() = this.getRegexInput()
|
||||
or
|
||||
// string literal -> regex object -> use
|
||||
exists(RegexCreation regexCreation |
|
||||
result.(ParsedStringRegex).getUse() = regexCreation.getStringInput() and
|
||||
DataFlow::exprNode(result).(ParsedStringRegex).getAParse() = regexCreation.getStringInput() and
|
||||
RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput()))
|
||||
)
|
||||
}
|
||||
|
||||
21
swift/ql/lib/codeql/swift/security/regex/HostnameRegex.qll
Normal file
21
swift/ql/lib/codeql/swift/security/regex/HostnameRegex.qll
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Provides predicates for reasoning about regular expressions
|
||||
* that match URLs and hostname patterns.
|
||||
*/
|
||||
|
||||
private import swift
|
||||
private import codeql.swift.dataflow.DataFlow
|
||||
private import codeql.swift.regex.Regex as Regex
|
||||
private import codeql.swift.regex.RegexTreeView::RegexTreeView as TreeImpl
|
||||
private import codeql.regex.HostnameRegexp as Shared
|
||||
|
||||
/**
|
||||
* An implementation of the signature that allows the Hostname analysis to run.
|
||||
*/
|
||||
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
|
||||
class DataFlowNode = DataFlow::Node;
|
||||
|
||||
class RegExpPatternSource = Regex::RegexPatternSource;
|
||||
}
|
||||
|
||||
import Shared::Make<TreeImpl, Impl>
|
||||
@@ -0,0 +1,5 @@
|
||||
---
|
||||
category: newQuery
|
||||
---
|
||||
|
||||
* Added new query "Incomplete regular expression for hostnames" (`swift/incomplete-hostname-regexp`). This query finds regular expressions matching a URL or hostname that may match more hostnames than expected.
|
||||
@@ -0,0 +1,74 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
|
||||
Sanitizing untrusted URLs is an important technique for
|
||||
preventing attacks such as request forgeries and malicious
|
||||
redirections. Often, this is done by checking that the host of a URL
|
||||
is in a set of allowed hosts.
|
||||
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
||||
If a regular expression implements such a check, it is
|
||||
easy to accidentally make the check too permissive by not escaping the
|
||||
<code>.</code> meta-characters appropriately.
|
||||
|
||||
Even if the check is not used in a security-critical
|
||||
context, the incomplete check may still cause undesirable behaviors
|
||||
when it accidentally succeeds.
|
||||
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
|
||||
Escape all meta-characters appropriately when constructing
|
||||
regular expressions for security checks, and pay special attention to the
|
||||
<code>.</code> meta-character.
|
||||
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
|
||||
<p>
|
||||
|
||||
The following example code checks that a URL redirection
|
||||
will reach the <code>example.com</code> domain, or one of its
|
||||
subdomains.
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="IncompleteHostnameRegexBad.swift"/>
|
||||
|
||||
<p>
|
||||
|
||||
The check is, however, easy to bypass because the unescaped
|
||||
<code>.</code> allows for any character before
|
||||
<code>example.com</code>, effectively allowing the redirect to go to
|
||||
an attacker-controlled domain such as <code>wwwXexample.com</code>.
|
||||
|
||||
</p>
|
||||
<p>
|
||||
|
||||
Address this vulnerability by escaping <code>.</code>
|
||||
to <code>\.</code>:
|
||||
|
||||
</p>
|
||||
|
||||
<sample src="IncompleteHostnameRegexGood.swift"/>
|
||||
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">Server Side Request Forgery</a>.</li>
|
||||
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* @name Incomplete regular expression for hostnames
|
||||
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname may match more hostnames than expected.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id swift/incomplete-hostname-regexp
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
private import codeql.swift.security.regex.HostnameRegex as HostnameRegex
|
||||
|
||||
query predicate problems = HostnameRegex::incompleteHostnameRegExp/4;
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
func handleUrl(_ urlString: String) {
|
||||
// get the 'url=' parameter from the URL
|
||||
let components = URLComponents(string: urlString)
|
||||
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
|
||||
|
||||
// check we trust the host
|
||||
let regex = #/^(www|beta).example.com//# // BAD
|
||||
if let match = redirectParam?.value?.firstMatch(of: regex) {
|
||||
// ... trust the URL ...
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
func handleUrl(_ urlString: String) {
|
||||
// get the 'url=' parameter from the URL
|
||||
let components = URLComponents(string: urlString)
|
||||
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
|
||||
|
||||
// check we trust the host
|
||||
let regex = #/^(www|beta)\.example\.com//# // GOOD
|
||||
if let match = redirectParam?.value?.firstMatch(of: regex) {
|
||||
// ... trust the URL ...
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
| test.swift:60:17:60:40 | ^http://test.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:60:16:60:16 | ^http://test.example.com/ | here |
|
||||
| test.swift:63:17:63:40 | ^http://test.example.net/ | This regular expression has an unescaped '.' before 'example.net/', so it might match more hosts than expected. | test.swift:63:16:63:16 | ^http://test.example.net/ | here |
|
||||
| test.swift:64:17:64:54 | ^http://test.(example-a\|example-b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com/', so it might match more hosts than expected. | test.swift:64:16:64:16 | ^http://test.(example-a\|example-b).com/ | here |
|
||||
| test.swift:65:17:65:40 | ^http://(.+).example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:65:16:65:16 | ^http://(.+).example.com/ | here |
|
||||
| test.swift:65:17:65:40 | ^http://(.+).example.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com/' to be matched anywhere in the URL, outside the hostname. | test.swift:65:16:65:16 | ^http://(.+).example.com/ | here |
|
||||
| test.swift:67:17:67:49 | ^http://(?:.+)\\.test\\.example.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com/' to be matched anywhere in the URL, outside the hostname. | test.swift:67:16:67:16 | ^http://(?:.+)\\.test\\.example.com/ | here |
|
||||
| test.swift:68:17:68:46 | ^http://test.example.com/(?:.*) | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:68:16:68:16 | ^http://test.example.com/(?:.*) | here |
|
||||
| test.swift:70:17:70:63 | ^(https?:)?//((service\|www).)?example.com(?=$\|/) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:70:16:70:16 | ^(https?:)?//((service\|www).)?example.com(?=$\|/) | here |
|
||||
| test.swift:71:17:71:51 | ^(http\|https)://www.example.com/p/f/ | This regular expression has an unescaped '.' before 'example.com/p/f/', so it might match more hosts than expected. | test.swift:71:16:71:16 | ^(http\|https)://www.example.com/p/f/ | here |
|
||||
| test.swift:72:19:72:40 | http://sub.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:72:16:72:16 | ^(http://sub.example.com/) | here |
|
||||
| test.swift:73:17:73:41 | ^https?://api.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:73:16:73:16 | ^https?://api.example.com/ | here |
|
||||
| test.swift:75:17:75:43 | ^https://[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:75:16:75:16 | ^https://[a-z]*.example.com$ | here |
|
||||
| test.swift:77:39:77:51 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
|
||||
| test.swift:77:54:77:68 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
|
||||
| test.swift:77:71:77:85 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
|
||||
| test.swift:81:19:81:33 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | test.swift:81:16:81:16 | ^(foo.example\\.com\|whatever)$ | here |
|
||||
| test.swift:83:17:83:33 | ^test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:83:16:83:16 | ^test.example.com$ | here |
|
||||
| test.swift:84:17:84:31 | test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:84:16:84:16 | test.example.com | here |
|
||||
| test.swift:86:26:86:41 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:86:16:86:48 | call to id(_:) | here |
|
||||
| test.swift:92:21:92:36 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:93:16:93:23 | .hostname | here |
|
||||
| test.swift:98:29:98:44 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:96:20:96:27 | .hostname | here |
|
||||
@@ -0,0 +1 @@
|
||||
queries/Security/CWE-020/IncompleteHostnameRegex.ql
|
||||
113
swift/ql/test/query-tests/Security/CWE-020/test.swift
Normal file
113
swift/ql/test/query-tests/Security/CWE-020/test.swift
Normal file
@@ -0,0 +1,113 @@
|
||||
|
||||
// --- stubs ---
|
||||
|
||||
struct URL {
|
||||
init?(string: String) {}
|
||||
}
|
||||
|
||||
extension String {
|
||||
init(contentsOf: URL) {
|
||||
let data = ""
|
||||
self.init(data)
|
||||
}
|
||||
}
|
||||
|
||||
struct AnyRegexOutput {
|
||||
}
|
||||
|
||||
protocol RegexComponent<RegexOutput> {
|
||||
associatedtype RegexOutput
|
||||
}
|
||||
|
||||
struct Regex<Output> : RegexComponent {
|
||||
struct Match {
|
||||
}
|
||||
|
||||
init(_ pattern: String) throws where Output == AnyRegexOutput { }
|
||||
|
||||
func firstMatch(in string: String) throws -> Regex<Output>.Match? { return nil}
|
||||
func wholeMatch(in string: String) throws -> Regex<Output>.Match? { return nil }
|
||||
|
||||
typealias RegexOutput = Output
|
||||
}
|
||||
|
||||
extension String : RegexComponent {
|
||||
typealias Output = Substring
|
||||
typealias RegexOutput = String.Output
|
||||
}
|
||||
|
||||
// --- tests ---
|
||||
|
||||
func id(_ s : String) -> String { return s }
|
||||
|
||||
struct MyDomain {
|
||||
init(_ hostname: String) {
|
||||
self.hostname = hostname
|
||||
}
|
||||
|
||||
var hostname: String
|
||||
}
|
||||
|
||||
func testHostnames(myUrl: URL) throws {
|
||||
let tainted = String(contentsOf: myUrl) // tainted
|
||||
|
||||
_ = try Regex(#"^http://example\.com/"#).firstMatch(in: tainted) // GOOD
|
||||
_ = try Regex(#"^http://example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain)
|
||||
_ = try Regex(#"^http://example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
|
||||
_ = try Regex(#"^http://test\.example\.com/"#).firstMatch(in: tainted) // GOOD
|
||||
_ = try Regex(#"^http://test\.example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain)
|
||||
_ = try Regex(#"^http://test\.example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
|
||||
_ = try Regex(#"^http://test.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^http://test[.]example[.]com/"#).firstMatch(in: tainted) // GOOD (alternative method of escaping)
|
||||
|
||||
_ = try Regex(#"^http://test.example.net/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^http://test.(example-a|example-b).com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^http://(.+).example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname x 2)
|
||||
_ = try Regex(#"^http://(\.+)\.example.com/"#).firstMatch(in: tainted) // GOOD
|
||||
_ = try Regex(#"^http://(?:.+)\.test\.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^http://test.example.com/(?:.*)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^(.+\.(?:example-a|example-b)\.com)/"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
|
||||
_ = try Regex(#"^(https?:)?//((service|www).)?example.com(?=$|/)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^(http|https)://www.example.com/p/f/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^(http://sub.example.com/)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^https?://api.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^http[s]?://?sub1\.sub2\.example\.com/f/(.+)"#).firstMatch(in: tainted) // GOOD (it has a capture group after the TLD, so should be ignored)
|
||||
_ = try Regex(#"^https://[a-z]*.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"^(example.dev|example.com)"#).firstMatch(in: tainted) // GOOD (any extended hostname wouldn't be included in the capture group)
|
||||
_ = try Regex(#"^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)"#).firstMatch(in: tainted) // BAD (incomplete hostname x3, missing anchor x 1)
|
||||
|
||||
_ = try Regex(#"^http://(..|...)\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (wildcards are intentional)
|
||||
_ = try Regex(#"^http://.\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (the wildcard is intentional)
|
||||
_ = try Regex(#"^(foo.example\.com|whatever)$"#).firstMatch(in: tainted) // DUBIOUS (one disjunction doesn't even look like a hostname) [DETECTED incomplete hostname]
|
||||
|
||||
_ = try Regex(#"^test.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
_ = try Regex(#"test.example.com"#).wholeMatch(in: tainted) // BAD (incomplete hostname, missing anchor)
|
||||
|
||||
_ = try Regex(id(id(id(#"test.example.com$"#)))).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
|
||||
let hostname = #"test.example.com$"# // BAD (incomplete hostname) [NOT DETECTED]
|
||||
_ = try Regex("\(hostname)").firstMatch(in: tainted)
|
||||
|
||||
var domain = MyDomain("")
|
||||
domain.hostname = #"test.example.com$"# // BAD (incomplete hostname)
|
||||
_ = try Regex(domain.hostname).firstMatch(in: tainted)
|
||||
|
||||
func convert1(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> {
|
||||
return try Regex(domain.hostname)
|
||||
}
|
||||
_ = try convert1(MyDomain(#"test.example.com$"#)).firstMatch(in: tainted) // BAD (incomplete hostname)
|
||||
|
||||
let domains = [ MyDomain(#"test.example.com$"#) ] // BAD (incomplete hostname) [NOT DETECTED]
|
||||
func convert2(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> {
|
||||
return try Regex(domain.hostname)
|
||||
}
|
||||
_ = try domains.map({ try convert2($0).firstMatch(in: tainted) })
|
||||
|
||||
let primary = "example.com$"
|
||||
_ = try Regex("test." + primary).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
|
||||
_ = try Regex("test." + "example.com$").firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
|
||||
_ = try Regex(#"^http://localhost:8000|" + "^https?://.+\.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
|
||||
_ = try Regex(#"^http://localhost:8000|" + "^https?://.+.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
|
||||
|
||||
let harmless = #"^http://test.example.com"# // GOOD (never used as a regex)
|
||||
}
|
||||
Reference in New Issue
Block a user