// --- stubs --- struct URL { init?(string: String) {} } extension String { init(contentsOf: URL) { let data = "" self.init(data) } } struct AnyRegexOutput { } protocol RegexComponent { associatedtype RegexOutput } struct Regex : RegexComponent { struct Match { } init(_ pattern: String) throws where Output == AnyRegexOutput { } func ignoresCase(_ ignoresCase: Bool = true) -> Regex.RegexOutput> { return self } func dotMatchesNewlines(_ dotMatchesNewlines: Bool = true) -> Regex.RegexOutput> { return self } func firstMatch(in string: String) throws -> Regex.Match? { return nil} typealias RegexOutput = Output } extension String : RegexComponent { typealias Output = Substring typealias RegexOutput = String.Output } class NSObject { } struct _NSRange { init(location: Int, length: Int) { } } typealias NSRange = _NSRange func NSMakeRange(_ loc: Int, _ len: Int) -> NSRange { return NSRange(location: loc, length: len) } class NSTextCheckingResult : NSObject { } class NSRegularExpression : NSObject { struct Options : OptionSet { var rawValue: UInt static var caseInsensitive: NSRegularExpression.Options { get { return Options(rawValue: 1 << 0) } } static var dotMatchesLineSeparators: NSRegularExpression.Options { get { return Options(rawValue: 1 << 1) } } } struct MatchingOptions : OptionSet { var rawValue: UInt } init(pattern: String, options: NSRegularExpression.Options = []) throws { } func matches(in string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> [NSTextCheckingResult] { return [] } func firstMatch(in string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> NSTextCheckingResult? { return nil } } // --- tests --- func myRegexpVariantsTests(myUrl: URL) throws { let tainted = String(contentsOf: myUrl) // tainted // BAD - doesn't match newlines or `` let re1 = try Regex(#".*?<\/script>"#).ignoresCase(true) _ = try re1.firstMatch(in: tainted) // BAD - doesn't match `` let re2a = try Regex(#"(?is).*?<\/script>"#) _ = try re2a.firstMatch(in: tainted) // BAD - doesn't match `` let re2b = try Regex(#".*?<\/script>"#).ignoresCase(true).dotMatchesNewlines(true) _ = try re2b.firstMatch(in: tainted) // BAD - doesn't match `` let options2c: NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators] let ns2c = try NSRegularExpression(pattern: #".*?<\/script>"#, options: options2c) _ = ns2c.firstMatch(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // GOOD let re3a = try Regex(#"(?is).*?<\/script[^>]*>"#) _ = try re3a.firstMatch(in: tainted) // GOOD let re3b = try Regex(#".*?<\/script[^>]*>"#).ignoresCase(true).dotMatchesNewlines(true) _ = try re3b.firstMatch(in: tainted) // GOOD let options3b: NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators] let ns3b = try NSRegularExpression(pattern: #".*?<\/script[^>]*>"#, options: options3b) _ = ns3b.firstMatch(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // GOOD - we don't care regexps that only match comments let re4 = try Regex(#""#).ignoresCase(true).dotMatchesNewlines(true) _ = try re4.firstMatch(in: tainted) // GOOD let re5 = try Regex(#")|([^\/\s>]+)[\S\s]*?>"#) _ = try re16.firstMatch(in: tainted) // BAD - doesn't match comments with the right capture groups let ns16 = try NSRegularExpression(pattern: #"<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>"#) _ = ns16.firstMatch(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - capture groups let re17 = try Regex(#"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"#) _ = try re17.firstMatch(in: tainted) // BAD - capture groups let ns17 = try NSRegularExpression(pattern: #"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"#, options: .caseInsensitive) _ = ns17.firstMatch(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - too strict matching on the end tag let ns2_1 = try NSRegularExpression(pattern: #"]*>([\s\S]*?)<\/script>"#, options: .caseInsensitive) _ = ns2_1.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - capture groups let ns2_2 = try NSRegularExpression(pattern: #"(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)"#, options: .caseInsensitive) _ = ns2_2.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - capture groups let ns2_3 = try NSRegularExpression(pattern: #"<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))"#) _ = ns2_3.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - capture groups let ns2_4 = try NSRegularExpression(pattern: #"|<([^>]*?)>"#) _ = ns2_4.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // GOOD - it's used with the ignorecase flag let ns2_5 = try NSRegularExpression(pattern: #"]*)>([\S\s]*?)<\/script([^>]*)>"#, options: .caseInsensitive) _ = ns2_5.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // BAD - doesn't match --!> let ns2_6 = try NSRegularExpression(pattern: #"-->"#) _ = ns2_6.matches(in: tainted, range: NSMakeRange(0, tainted.utf16.count)) // GOOD let ns2_7 = try NSRegularExpression(pattern: #"^>|^->||--!>|