Merge pull request #13660 from geoffw0/regexinjection

Swift: Query for regular expression injection
This commit is contained in:
Geoffrey White
2023-07-18 10:25:30 +01:00
committed by GitHub
10 changed files with 454 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
/**
* Provides classes and predicates to reason about regular expression injection
* vulnerabilities.
*/
import swift
import codeql.swift.dataflow.DataFlow
import codeql.swift.dataflow.ExternalFlow
import codeql.swift.regex.Regex
/**
* A data flow sink for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionSink extends DataFlow::Node { }
/**
* A barrier for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionBarrier extends DataFlow::Node { }
/**
* A unit class for adding additional flow steps.
*/
class RegexInjectionAdditionalFlowStep extends Unit {
/**
* Holds if the step from `node1` to `node2` should be considered a flow
* step for paths related to regular expression injection vulnerabilities.
*/
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
}
/**
* A sink that is a regular expression evaluation defined in the Regex library.
* This includes various methods that consume a regular expression string, but
* in general misses cases where a regular expression string is converted into
* an object (such as a `Regex` or `NSRegularExpression`) for later evaluation.
* These cases are modeled separately.
*/
private class EvalRegexInjectionSink extends RegexInjectionSink {
EvalRegexInjectionSink() { this.asExpr() = any(RegexEval e).getRegexInput() }
}
/**
* A sink that is a regular expression use defined in a CSV model.
*/
private class DefaultRegexInjectionSink extends RegexInjectionSink {
DefaultRegexInjectionSink() { sinkNode(this, "regex-use") }
}
private class RegexInjectionSinks extends SinkModelCsv {
override predicate row(string row) {
row =
[
";Regex;true;init(_:);;;Argument[0];regex-use",
";Regex;true;init(_:as:);;;Argument[0];regex-use",
";NSRegularExpression;true;init(pattern:options:);;;Argument[0];regex-use",
]
}
}

View File

@@ -0,0 +1,30 @@
/**
* Provides a taint-tracking configuration for detecting regular expression
* injection vulnerabilities.
*/
import swift
import codeql.swift.dataflow.DataFlow
import codeql.swift.dataflow.TaintTracking
import codeql.swift.dataflow.FlowSources
import codeql.swift.security.regex.RegexInjectionExtensions
/**
* A taint configuration for detecting regular expression injection vulnerabilities.
*/
module RegexInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof FlowSource }
predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier }
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo)
}
}
/**
* Detect taint flow of tainted data that reaches a regular expression sink.
*/
module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>;

View File

@@ -0,0 +1,4 @@
---
category: newQuery
---
* Added new query "Regular expression injection" (`swift/regex-injection`). The query finds places where user input is used to construct a regular expression without proper escaping.

View File

@@ -0,0 +1,50 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
Constructing a regular expression with unsanitized user input is dangerous,
since a malicious user may be able to modify the meaning of the expression. They
may be able to cause unexpected program behaviour, or perform a denial-of-service
attack. For example, they may provide a regular expression fragment that takes
exponential time to evaluate in the worst case.
</p>
</overview>
<recommendation>
<p>
Before embedding user input into a regular expression, use a sanitization
function such as <code>NSRegularExpression::escapedPattern(for:)</code> to escape
meta-characters that have special meaning.
</p>
</recommendation>
<example>
<p>
The following examples construct regular expressions from user input without
sanitizing it first:
</p>
<sample src="RegexInjectionBad.swift" />
<p>
If user input is used to construct a regular expression it should be sanitized
first. This ensures that the user cannot insert characters that have special
meanings in regular expressions.
</p>
<sample src="RegexInjectionGood.swift" />
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS">Regular expression Denial of Service - ReDoS</a>.
</li>
<li>
Wikipedia: <a href="https://en.wikipedia.org/wiki/ReDoS">ReDoS</a>.
</li>
<li>
Swift: <a href="https://developer.apple.com/documentation/foundation/nsregularexpression/1408386-escapedpattern">NSRegularExpression.escapedPattern(for:)</a>.
</li>
</references>
</qhelp>

View File

@@ -0,0 +1,24 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without first being escaped,
* otherwise a malicious user may be able to provide a regex that could require
* exponential time on certain inputs.
* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id swift/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/
import swift
import codeql.swift.dataflow.DataFlow
import codeql.swift.security.regex.RegexInjectionQuery
import RegexInjectionFlow::PathGraph
from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode
where RegexInjectionFlow::flowPath(sourceNode, sinkNode)
select sinkNode.getNode(), sourceNode, sinkNode,
"This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value"

View File

@@ -0,0 +1,12 @@
func processRemoteInput(remoteInput: String) {
...
// BAD: Unsanitized user input is used to construct a regular expression
let regex1 = try Regex(remoteInput)
// BAD: Unsanitized user input is used to construct a regular expression
let regexStr = "abc|\(remoteInput)"
let regex2 = try NSRegularExpression(pattern: regexStr)
...
}

View File

@@ -0,0 +1,13 @@
func processRemoteInput(remoteInput: String) {
...
// GOOD: Regular expression is not derived from user input
let regex1 = try Regex(myRegex)
// GOOD: User input is sanitized before being used to construct a regular expression
let escapedInput = NSRegularExpression.escapedPattern(for: remoteInput)
let regexStr = "abc|\(escapedInput)"
let regex2 = try NSRegularExpression(pattern: regexStr)
...
}

View File

@@ -0,0 +1,68 @@
edges
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:101:16:101:16 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:104:16:104:40 | ... .+(_:_:) ... |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:106:16:106:16 | "..." |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:109:16:109:39 | ... ? ... : ... |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:110:16:110:37 | ... ? ... : ... |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:113:24:113:24 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:114:45:114:45 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:120:19:120:19 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:131:39:131:39 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:144:16:144:16 | remoteInput |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:147:39:147:39 | regexStr |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:162:17:162:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:164:17:164:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:167:17:167:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:170:17:170:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:173:17:173:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:176:17:176:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:179:17:179:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:182:17:182:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:185:17:185:17 | taintedString |
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:190:21:190:21 | taintedString |
nodes
| tests.swift:95:22:95:46 | call to String.init(contentsOf:) | semmle.label | call to String.init(contentsOf:) |
| tests.swift:101:16:101:16 | taintedString | semmle.label | taintedString |
| tests.swift:104:16:104:40 | ... .+(_:_:) ... | semmle.label | ... .+(_:_:) ... |
| tests.swift:106:16:106:16 | "..." | semmle.label | "..." |
| tests.swift:109:16:109:39 | ... ? ... : ... | semmle.label | ... ? ... : ... |
| tests.swift:110:16:110:37 | ... ? ... : ... | semmle.label | ... ? ... : ... |
| tests.swift:113:24:113:24 | taintedString | semmle.label | taintedString |
| tests.swift:114:45:114:45 | taintedString | semmle.label | taintedString |
| tests.swift:120:19:120:19 | taintedString | semmle.label | taintedString |
| tests.swift:131:39:131:39 | taintedString | semmle.label | taintedString |
| tests.swift:144:16:144:16 | remoteInput | semmle.label | remoteInput |
| tests.swift:147:39:147:39 | regexStr | semmle.label | regexStr |
| tests.swift:162:17:162:17 | taintedString | semmle.label | taintedString |
| tests.swift:164:17:164:17 | taintedString | semmle.label | taintedString |
| tests.swift:167:17:167:17 | taintedString | semmle.label | taintedString |
| tests.swift:170:17:170:17 | taintedString | semmle.label | taintedString |
| tests.swift:173:17:173:17 | taintedString | semmle.label | taintedString |
| tests.swift:176:17:176:17 | taintedString | semmle.label | taintedString |
| tests.swift:179:17:179:17 | taintedString | semmle.label | taintedString |
| tests.swift:182:17:182:17 | taintedString | semmle.label | taintedString |
| tests.swift:185:17:185:17 | taintedString | semmle.label | taintedString |
| tests.swift:190:21:190:21 | taintedString | semmle.label | taintedString |
subpaths
#select
| tests.swift:101:16:101:16 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:101:16:101:16 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:104:16:104:40 | ... .+(_:_:) ... | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:104:16:104:40 | ... .+(_:_:) ... | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:106:16:106:16 | "..." | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:106:16:106:16 | "..." | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:109:16:109:39 | ... ? ... : ... | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:109:16:109:39 | ... ? ... : ... | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:110:16:110:37 | ... ? ... : ... | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:110:16:110:37 | ... ? ... : ... | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:113:24:113:24 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:113:24:113:24 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:114:45:114:45 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:114:45:114:45 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:120:19:120:19 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:120:19:120:19 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:131:39:131:39 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:131:39:131:39 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:144:16:144:16 | remoteInput | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:144:16:144:16 | remoteInput | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:147:39:147:39 | regexStr | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:147:39:147:39 | regexStr | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:162:17:162:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:162:17:162:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:164:17:164:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:164:17:164:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:167:17:167:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:167:17:167:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:170:17:170:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:170:17:170:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:173:17:173:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:173:17:173:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:176:17:176:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:176:17:176:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:179:17:179:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:179:17:179:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:182:17:182:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:182:17:182:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:185:17:185:17 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:185:17:185:17 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |
| tests.swift:190:21:190:21 | taintedString | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | tests.swift:190:21:190:21 | taintedString | This regular expression is constructed from a $@. | tests.swift:95:22:95:46 | call to String.init(contentsOf:) | user-provided value |

View File

@@ -0,0 +1 @@
queries/Security/CWE-730/RegexInjection.ql

View File

@@ -0,0 +1,193 @@
// --- stubs ---
struct URL {
init?(string: String) {}
}
struct AnyRegexOutput {
}
protocol RegexComponent<RegexOutput> {
associatedtype RegexOutput
}
struct Regex<Output> : RegexComponent {
struct Match {
}
init(_ pattern: String) throws where Output == AnyRegexOutput { }
func firstMatch(in string: String) throws -> Regex<Output>.Match? { return nil}
typealias RegexOutput = Output
}
extension RangeReplaceableCollection {
mutating func replace<Replacement>(_ regex: some RegexComponent, with replacement: Replacement, maxReplacements: Int = .max) where Replacement : Collection, Replacement.Element == Character { }
}
extension StringProtocol {
func replacingOccurrences<Target, Replacement>(of target: Target, with replacement: Replacement, options: String.CompareOptions = [], range searchRange: Range<Self.Index>? = nil) -> String where Target : StringProtocol, Replacement : StringProtocol { return "" }
}
extension String : RegexComponent {
typealias CompareOptions = NSString.CompareOptions
typealias Output = Substring
typealias RegexOutput = String.Output
}
class NSObject {
}
class NSString : NSObject {
struct CompareOptions : OptionSet {
var rawValue: UInt
static var regularExpression: NSString.CompareOptions { get { return CompareOptions(rawValue: 1) } }
}
convenience init(string aString: String) { self.init() }
func replacingOccurrences(of target: String, with replacement: String, options: NSString.CompareOptions = [], range searchRange: NSRange) -> String { return "" }
var length: Int { get { return 0 } }
}
struct _NSRange {
init(location: Int, length: Int) { }
}
typealias NSRange = _NSRange
func NSMakeRange(_ loc: Int, _ len: Int) -> NSRange { return NSRange(location: loc, length: len) }
class NSTextCheckingResult : NSObject {
}
class NSRegularExpression : NSObject {
struct Options : OptionSet {
var rawValue: UInt
}
struct MatchingOptions : OptionSet {
var rawValue: UInt
}
init(pattern: String, options: NSRegularExpression.Options = []) throws { }
func firstMatch(in string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> NSTextCheckingResult? { return nil }
class func escapedPattern(for string: String) -> String { return "" }
}
extension String {
init(contentsOf: URL) {
let data = ""
self.init(data)
}
}
// --- tests ---
func regexInjectionTests(cond: Bool, varString: String, myUrl: URL) throws {
let constString = ".*"
let taintedString = String(contentsOf: myUrl) // tainted
// --- Regex ---
_ = try Regex(constString).firstMatch(in: varString)
_ = try Regex(varString).firstMatch(in: varString)
_ = try Regex(taintedString).firstMatch(in: varString) // BAD
_ = try Regex("(a|" + constString + ")").firstMatch(in: varString)
_ = try Regex("(a|" + taintedString + ")").firstMatch(in: varString) // BAD
_ = try Regex("(a|\(constString))").firstMatch(in: varString)
_ = try Regex("(a|\(taintedString))").firstMatch(in: varString) // BAD
_ = try Regex(cond ? constString : constString).firstMatch(in: varString)
_ = try Regex(cond ? taintedString : constString).firstMatch(in: varString) // BAD
_ = try Regex(cond ? constString : taintedString).firstMatch(in: varString) // BAD
_ = try (cond ? Regex(constString) : Regex(constString)).firstMatch(in: varString)
_ = try (cond ? Regex(taintedString) : Regex(constString)).firstMatch(in: varString) // BAD
_ = try (cond ? Regex(constString) : Regex(taintedString)).firstMatch(in: varString) // BAD
// --- RangeReplaceableCollection ---
var inputVar = varString
inputVar.replace(constString, with: "")
inputVar.replace(taintedString, with: "") // BAD
inputVar.replace(constString, with: taintedString)
// --- StringProtocol ---
_ = inputVar.replacingOccurrences(of: constString, with: "", options: .regularExpression)
_ = inputVar.replacingOccurrences(of: taintedString, with: "", options: .regularExpression) // BAD [NOT DETECTED]
// --- NSRegularExpression ---
_ = try NSRegularExpression(pattern: constString).firstMatch(in: varString, range: NSMakeRange(0, varString.utf16.count))
_ = try NSRegularExpression(pattern: taintedString).firstMatch(in: varString, range: NSMakeRange(0, varString.utf16.count)) // BAD
// --- NSString ---
let nsString = NSString(string: varString)
_ = nsString.replacingOccurrences(of: constString, with: "", options: .regularExpression, range: NSMakeRange(0, nsString.length))
_ = nsString.replacingOccurrences(of: taintedString, with: "", options: .regularExpression, range: NSMakeRange(0, nsString.length)) // BAD [NOT DETECTED]
// --- from the qhelp ---
let remoteInput = taintedString
let myRegex = ".*"
_ = try Regex(remoteInput) // BAD
let regexStr = "abc|\(remoteInput)"
_ = try NSRegularExpression(pattern: regexStr) // BAD
_ = try Regex(myRegex)
let escapedInput = NSRegularExpression.escapedPattern(for: remoteInput)
let regexStr4 = "abc|\(escapedInput)"
_ = try NSRegularExpression(pattern: regexStr4)
// --- barriers ---
let okInput = "abc"
let okInputs = ["abc", "def"]
let okSet: Set = ["abc", "def"]
if (taintedString == okInput) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
} else {
_ = try Regex(taintedString).firstMatch(in: varString) // BAD
}
if (taintedString != okInput) {
_ = try Regex(taintedString).firstMatch(in: varString) // BAD
}
if (varString == okInput) {
_ = try Regex(taintedString).firstMatch(in: varString) // BAD
}
if (okInputs.contains(taintedString)) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
}
if (okInputs.firstIndex(of: taintedString) != nil) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
}
if let index = okInputs.firstIndex(of: taintedString) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
}
if let index = okInputs.index(of: taintedString) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
}
if (okSet.contains(taintedString)) {
_ = try Regex(taintedString).firstMatch(in: varString) // GOOD (effectively sanitized by the check) [FALSE POSITIVE]
}
// --- multiple evaluations ---
let re = try Regex(taintedString) // BAD
_ = try re.firstMatch(in: varString) // (we only want to flag one location total)
_ = try re.firstMatch(in: varString)
}