codeql/swift/ql/test/library-tests/regex/redos_variants.swift


// --- stubs ---

struct URL {
    init?(string: String) {}
}

struct AnyRegexOutput {
}

protocol RegexComponent {
}

struct Regex<Output> : RegexComponent {
    struct Match {
    }

    init(_ pattern: String) throws where Output == AnyRegexOutput { }

    func firstMatch(in string: String) throws -> Regex<Output>.Match? { return nil}
    func prefixMatch(in string: String) throws -> Regex<Output>.Match? { return nil}
    func wholeMatch(in string: String) throws -> Regex<Output>.Match? { return nil}

    typealias RegexOutput = Output
}

extension String {
    init(contentsOf: URL) {
        let data = ""
        self.init(data)
    }
}

// --- tests ---
//
// the focus for these tests is different vulnerable and non-vulnerable regexp strings.

func myRegexpVariantsTests(myUrl: URL) throws {
    let tainted = String(contentsOf: myUrl) // tainted

    // basic cases:
    // attack string: "a" x lots + "!"

    _ = try Regex(".*").firstMatch(in: tainted) // $ regex=.* input=tainted

    _ = try Regex("a*b").firstMatch(in: tainted) // $ regex=a*b input=tainted
    _ = try Regex("(a*)b").firstMatch(in: tainted) // $ regex=(a*)b input=tainted
    _ = try Regex("(a)*b").firstMatch(in: tainted) // $ regex=(a)*b input=tainted
    _ = try Regex("(a*)*b").firstMatch(in: tainted) // $ regex=(a*)*b input=tainted redos-vulnerable
    _ = try Regex("((a*)*b)").firstMatch(in: tainted) // $ regex=((a*)*b) input=tainted redos-vulnerable

    _ = try Regex("(a|aa?)b").firstMatch(in: tainted) // $ regex=(a|aa?)b input=tainted
    _ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ regex=(a|aa?)*b input=tainted redos-vulnerable

    // from the qhelp:
    // attack string: "_" x lots + "!"

    _ = try Regex("^_(__|.)+_$").firstMatch(in: tainted) // $ regex=^_(__|.)+_$ input=tainted redos-vulnerable
    _ = try Regex("^_(__|[^_])+_$").firstMatch(in: tainted) // $ regex=^_(__|[^_])+_$ input=tainted

    // real world cases:

    // Adapted from marked (https://github.com/markedjs/marked), which is licensed
    // under the MIT license; see file licenses/marked-LICENSE.
    // GOOD
    _ = try Regex(#"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"#).firstMatch(in: tainted) // $ regex=^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*) SPURIOUS: redos-vulnerable
    // BAD
    // attack string: "_" + "__".repeat(100)
    _ = try Regex(#"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"#).wholeMatch(in: tainted) // $ redos-vulnerable regex=^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)

    // GOOD
    // Adapted from marked (https://github.com/markedjs/marked), which is licensed
    // under the MIT license; see file licenses/marked-LICENSE.
    _ = try Regex(#"^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)"#).firstMatch(in: tainted) // $ regex=^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)

    // GOOD - there is no witness in the end that could cause the regexp to not match
    // Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
    // which is licensed under the MIT license; see file licenses/brace-expansion-LICENSE.
    _ = try Regex("(.*,)+.+").firstMatch(in: tainted) // $ regex=(.*,)+.+

    // BAD
    // attack string: " '" + "\\\\".repeat(100)
    // Adapted from CodeMirror (https://github.com/codemirror/codemirror),
    // which is licensed under the MIT license; see file licenses/CodeMirror-LICENSE.
    _ = try Regex(#"^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?

    // GOOD
    // Adapted from jest (https://github.com/facebook/jest), which is licensed
    // under the MIT license; see file licenses/jest-LICENSE.
    _ = try Regex(#"^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*"#).firstMatch(in: tainted) // $ regex="^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*"

    // BAD
    // attack string: "/" + "\\/a".repeat(100)
    // Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
    // which is licensed under the Apache License 2.0; see file licenses/ANodeBlog-LICENSE.
    _ = try Regex(#"\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)"#).firstMatch(in: tainted) // $ redos-vulnerable regex="\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)"

    // BAD
    // attack string: "##".repeat(100) + "\na"
    // Adapted from CodeMirror (https://github.com/codemirror/codemirror),
    // which is licensed under the MIT license; see file licenses/CodeMirror-LICENSE.
    _ = try Regex(#"^([\s\[\{\(]|#.*)*$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^([\s\[\{\(]|#.*)*$

    // BAD
    // attack string: "a" + "[]".repeat(100) + ".b\n"
    // Adapted from Knockout (https://github.com/knockout/knockout), which is
    // licensed under the MIT license; see file licenses/knockout-LICENSE
    _ = try Regex(#"^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$

    // BAD
    // attack string: "[" + "][".repeat(100) + "]!"
    // Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
    // is licensed under the MIT license; see file licenses/Prototype.js-LICENSE.
    _ = try Regex(#"(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)

    // BAD
    // attack string: "'" + "\\a".repeat(100) + '"'
    // Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
    // under the MIT license; see file licenses/Prism-LICENSE.
    _ = try Regex(#"("|')(\\?.)*?\1"#).firstMatch(in: tainted) // $ redos-vulnerable regex=("|')(\\?.)*?\1

    // more cases:

    // GOOD
    _ = try Regex(#"(\r\n|\r|\n)+"#).firstMatch(in: tainted) // $ regex=(\r\n|\r|\n)+

    // GOOD
    _ = try Regex("(a|.)*").firstMatch(in: tainted) // $ regex=(a|.)*

    // BAD - testing the NFA
    // attack string: "a" x lots + "!"
    _ = try Regex("^([a-z]+)+$").firstMatch(in: tainted) // $ redos-vulnerable regex=^([a-z]+)+$
    _ = try Regex("^([a-z]*)*$").firstMatch(in: tainted) // $ redos-vulnerable regex=^([a-z]*)*$
    _ = try Regex(#"^([a-zA-Z0-9])(([\\.-]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^([a-zA-Z0-9])(([\\.-]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$
    _ = try Regex("^(([a-z])+.)+[A-Z]([a-z])+$").firstMatch(in: tainted) // $ redos-vulnerable regex=^(([a-z])+.)+[A-Z]([a-z])+$

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex("(b|a?b)*c").firstMatch(in: tainted) // $ redos-vulnerable regex=(b|a?b)*c

    // GOOD
    _ = try Regex(#"(.|\n)*!"#).firstMatch(in: tainted) // $ regex=(.|\n)*!

    // BAD
    // attack string: "\n".repeat(100) + "."
    _ = try Regex(#"(?s)(.|\n)*!"#).firstMatch(in: tainted) // $ modes=DOTALL redos-vulnerable regex=(?s)(.|\n)*!

    // GOOD
    _ = try Regex(#"([\w.]+)*"#).firstMatch(in: tainted) // $ regex=([\w.]+)*
    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex(#"([\w.]+)*"#).wholeMatch(in: tainted) // $ regex=([\w.]+)* MISSING: redos-vulnerable

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex(#"(([\s\S]|[^a])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([\s\S]|[^a])*)"

    // GOOD - there is no witness in the end that could cause the regexp to not match
    _ = try Regex(#"([^"']+)*"#).firstMatch(in: tainted) // $ regex=([^"']+)*

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex(#"((.|[^a])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((.|[^a])*)"

    // GOOD
    _ = try Regex(#"((a|[^a])*)""#).firstMatch(in: tainted) // $ regex=((a|[^a])*)"

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex(#"((b|[^a])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((b|[^a])*)"

    // BAD
    // attack string: "G" x lots + "!"
    _ = try Regex(#"((G|[^a])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((G|[^a])*)"

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"(([0-9]|[^a])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([0-9]|[^a])*)"

    // BAD [NOT DETECTED]
    // (no confirmed attack string)
    _ = try Regex(#"(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))?"#).firstMatch(in: tainted) // $ regex=(?:=(?:([!#\$%&'\*\+\-\.\^_`\|~0-9A-Za-z]+)|"((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)"))? MISSING: redos-vulnerable

    // BAD [NOT DETECTED]
    // (no confirmed attack string)
    _ = try Regex(#""((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)""#).firstMatch(in: tainted) // $ regex="((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"])*)" MISSING: redos-vulnerable

    // GOOD
    _ = try Regex(#""((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)""#).firstMatch(in: tainted) // $ regex="((?:\\[\x00-\x7f]|[^\x00-\x08\x0a-\x1f\x7f"\\])*)"

    // BAD
    // attack string: "d" x lots + "!"
    _ = try Regex(#"(([a-z]|[d-h])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([a-z]|[d-h])*)"

    // BAD
    // attack string: "_" x lots
    _ = try Regex(#"(([^a-z]|[^0-9])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([^a-z]|[^0-9])*)"

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"((\d|[0-9])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\d|[0-9])*)"

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"((\s|\s)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\s|\s)*)"

    // BAD
    // attack string: "G" x lots + "!"
    _ = try Regex(#"((\w|G)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\w|G)*)"

    // GOOD
    _ = try Regex(#"((\s|\d)*)""#).firstMatch(in: tainted) // $ regex=((\s|\d)*)"

    // BAD
    // attack string: "5" x lots + "!"
    _ = try Regex(#"((\d|\d)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\d|\d)*)"

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"((\d|\w)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\d|\w)*)"

    // BAD
    // attack string: "5" x lots + "!"
    _ = try Regex(#"((\d|5)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\d|5)*)"

    // BAD
    // attack string: "\u{000C}" x lots + "!",
    _ = try Regex(#"((\s|[\f])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\s|[\f])*)"

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"((\s|[\v]|\\v)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\s|[\v]|\\v)*)"

    // BAD
    // attack string: "\u{000C}" x lots + "!",
    _ = try Regex(#"((\f|[\f])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\f|[\f])*)"

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"((\W|\D)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\W|\D)*)"

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex(#"((\S|\w)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\S|\w)*)"

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex(#"((\S|[\w])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((\S|[\w])*)"

    // BAD
    // attack string: "1s" x lots + "!"
    _ = try Regex(#"((1s|[\da-z])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((1s|[\da-z])*)"

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"((0|[\d])*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=((0|[\d])*)"

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"(([\d]+)*)""#).firstMatch(in: tainted) // $ redos-vulnerable regex=(([\d]+)*)"

    // GOOD - there is no witness in the end that could cause the regexp to not match
    _ = try Regex(#"(\d+(X\d+)?)+"#).firstMatch(in: tainted) // $ regex=(\d+(X\d+)?)+
    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"(\d+(X\d+)?)+"#).wholeMatch(in: tainted) // $ regex=(\d+(X\d+)?)+ MISSING: redos-vulnerable

    // GOOD - there is no witness in the end that could cause the regexp to not match
    _ = try Regex("([0-9]+(X[0-9]*)?)*").firstMatch(in: tainted) // $ regex=([0-9]+(X[0-9]*)?)*
    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex("([0-9]+(X[0-9]*)?)*").wholeMatch(in: tainted) // $ regex=([0-9]+(X[0-9]*)?)* MISSING: redos-vulnerable

    // GOOD
    _ = try Regex("^([^>]+)*(>|$)").firstMatch(in: tainted) // $ regex=^([^>]+)*(>|$)

    // BAD
    // attack string: "##".repeat(100) + "\na"
    _ = try Regex("^([^>a]+)*(>|$)").firstMatch(in: tainted) // $ redos-vulnerable regex=^([^>a]+)*(>|$)

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"(\n\s*)+$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(\n\s*)+$

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|\{\d+(?:,\d*)?})"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|\{\d+(?:,\d*)?})

    // BAD
    // (no confirmed attack string)
    _ = try Regex(#"\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}"#).firstMatch(in: tainted) // $ redos-vulnerable regex="\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}"

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(a+|b+|c+)*c").firstMatch(in: tainted) // $ redos-vulnerable regex=(a+|b+|c+)*c

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(((a+a?)*)+b+)").firstMatch(in: tainted) // $ redos-vulnerable regex=(((a+a?)*)+b+)

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(a+)+bbbb").firstMatch(in: tainted) // $ redos-vulnerable regex=(a+)+bbbb

    // GOOD
    _ = try Regex("(a+)+aaaaa*a+").firstMatch(in: tainted) // $ regex=(a+)+aaaaa*a+
    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(a+)+aaaaa*a+").wholeMatch(in: tainted) // $ regex=(a+)+aaaaa*a+ MISSING: redos-vulnerable

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(a+)+aaaaa$").firstMatch(in: tainted) // $ redos-vulnerable regex=(a+)+aaaaa$

    // GOOD
    _ = try Regex(#"(\n+)+\n\n"#).firstMatch(in: tainted) // $ regex=(\n+)+\n\n
    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"(\n+)+\n\n"#).wholeMatch(in: tainted) // $ regex=(\n+)+\n\n MISSING: redos-vulnerable

    // BAD
    // attack string: "\n" x lots + "."
    _ = try Regex(#"(\n+)+\n\n$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(\n+)+\n\n$

    // BAD
    // attack string: " " x lots + "X"
    _ = try Regex("([^X]+)*$").firstMatch(in: tainted) // $ redos-vulnerable regex=([^X]+)*$

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex("(([^X]b)+)*$").firstMatch(in: tainted) // $ redos-vulnerable regex=(([^X]b)+)*$

    // GOOD
    _ = try Regex("(([^X]b)+)*($|[^X]b)").firstMatch(in: tainted) // $ regex=(([^X]b)+)*($|[^X]b)
    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex("(([^X]b)+)*($|[^X]b)").wholeMatch(in: tainted) // $ regex=(([^X]b)+)*($|[^X]b) MISSING: redos-vulnerable

    // BAD
    // attack string: "b" x lots + "!"
    _ = try Regex("(([^X]b)+)*($|[^X]c)").firstMatch(in: tainted) // $ redos-vulnerable regex=(([^X]b)+)*($|[^X]c)

    // GOOD
    _ = try Regex("((ab)+)*ababab").firstMatch(in: tainted) // $ regex=((ab)+)*ababab
    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)+)*ababab").wholeMatch(in: tainted) // $ regex=((ab)+)*ababab MISSING: redos-vulnerable

    // GOOD
    _ = try Regex("((ab)+)*abab(ab)*(ab)+").firstMatch(in: tainted) // $ regex=((ab)+)*abab(ab)*(ab)+
    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)+)*abab(ab)*(ab)+").wholeMatch(in: tainted) // $ regex=((ab)+)*abab(ab)*(ab)+ MISSING: redos-vulnerable

    // GOOD
    _ = try Regex("((ab)+)*").firstMatch(in: tainted) // $ regex=((ab)+)*
    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)+)*").wholeMatch(in: tainted) // $ regex=((ab)+)* MISSING: redos-vulnerable

    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)+)*$").firstMatch(in: tainted) // $ redos-vulnerable regex=((ab)+)*$

    // GOOD
    _ = try Regex("((ab)+)*[a1][b1][a2][b2][a3][b3]").firstMatch(in: tainted) // $ regex=((ab)+)*[a1][b1][a2][b2][a3][b3]
    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)+)*[a1][b1][a2][b2][a3][b3]").wholeMatch(in: tainted) // $ regex=((ab)+)*[a1][b1][a2][b2][a3][b3] MISSING: redos-vulnerable

    // BAD
    // (no confirmed attack string)
    _ = try Regex(#"([\n\s]+)*(.)"#).firstMatch(in: tainted) // $ redos-vulnerable regex=([\n\s]+)*(.)

    // GOOD - any witness passes through the accept state.
    _ = try Regex("(A*A*X)*").firstMatch(in: tainted) // $ regex=(A*A*X)*

    // GOOD
    _ = try Regex(#"([^\\\]]+)*"#).firstMatch(in: tainted) // $ regex=([^\\\]]+)*

    // BAD
    _ = try Regex(#"(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(\w*foobarbaz\w*foobarbaz\w*foobarbaz\w*foobarbaz\s*foobarbaz\d*foobarbaz\w*)+-

    // GOOD
    // (these regexs explore a query performance issue we had at one point)
    _ = try Regex(#"(\w*foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar)+"#).firstMatch(in: tainted) // $ regex=(\w*foobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar)+
    _ = try Regex(#"(\w*foobarfoobarfoobar)+"#).firstMatch(in: tainted) // $ regex=(\w*foobarfoobarfoobar)+

    // BAD (but cannot currently construct a prefix)
    // attack string: "aa" + "b" x lots + "!"
    _ = try Regex("a{2,3}(b+)+X").firstMatch(in: tainted) // $ redos-vulnerable regex=a{2,3}(b+)+X

    // BAD (and a good prefix test)
    // (no confirmed attack string)
    _ = try Regex(#"^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^<(\w+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>

    // GOOD
    _ = try Regex(#"(a+)*[\s\S][\s\S][\s\S]?"#).firstMatch(in: tainted) // $ regex=(a+)*[\s\S][\s\S][\s\S]?

    // GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
    _ = try Regex(#"(a+)*[\s\S]{2,3}"#).firstMatch(in: tainted) // $ regex=(a+)*[\s\S]{2,3} SPURIOUS: redos-vulnerable

    // GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
    _ = try Regex(#"(a+)*([\s\S]{2,}|X)$"#).firstMatch(in: tainted) // $ regex=(a+)*([\s\S]{2,}|X)$ SPURIOUS: redos-vulnerable

    // GOOD
    _ = try Regex(#"(a+)*([\s\S]*|X)$"#).firstMatch(in: tainted) // $ regex=(a+)*([\s\S]*|X)$

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex(#"((a+)*$|[\s\S]+)"#).firstMatch(in: tainted) // $ redos-vulnerable regex=((a+)*$|[\s\S]+)

    // GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
    _ = try Regex(#"([\s\S]+|(a+)*$)"#).firstMatch(in: tainted) // $ regex=([\s\S]+|(a+)*$) SPURIOUS: redos-vulnerable

    // GOOD
    _ = try Regex("((;|^)a+)+$").firstMatch(in: tainted) // $ regex=((;|^)a+)+$

    // BAD (a good prefix test)
    // attack string: "00000000000000" + "e" x lots + "!"
    _ = try Regex("(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f").firstMatch(in: tainted) // $ redos-vulnerable regex=(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f

    // BAD
    // atack string: "ab" + "c" x lots + "!"
     _ = try Regex("^ab(c+)+$").firstMatch(in: tainted) // $ redos-vulnerable regex=^ab(c+)+$

    // BAD
    // (no confirmed attack string)
    _ = try Regex(#"(\d(\s+)*){20}"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(\d(\s+)*){20}

    // GOOD - but we spuriously conclude that a rejecting suffix exists.
    _ = try Regex(#"(([^/]|X)+)(\/[\s\S]*)*$"#).firstMatch(in: tainted) // $ regex=(([^/]|X)+)(\/[\s\S]*)*$ SPURIOUS: redos-vulnerable

    // GOOD - but we spuriously conclude that a rejecting suffix exists.
    _ = try Regex("^((x([^Y]+)?)*(Y|$))").firstMatch(in: tainted) // $ regex=^((x([^Y]+)?)*(Y|$)) SPURIOUS: redos-vulnerable

    // BAD
    // (no confirmed attack string)
    _ = try Regex(#"foo([\w-]*)+bar"#).firstMatch(in: tainted) // $ redos-vulnerable regex=foo([\w-]*)+bar

    // BAD
    // attack string: "ab" x lots + "!"
    _ = try Regex("((ab)*)+c").firstMatch(in: tainted) // $ redos-vulnerable regex=((ab)*)+c

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(a?a?)*b").firstMatch(in: tainted) // $ redos-vulnerable regex=(a?a?)*b

    // GOOD
    _ = try Regex("(a?)*b").firstMatch(in: tainted) // $ regex=(a?)*b

    // BAD - but not detected
    // (no confirmed attack string)
    _ = try Regex("(c?a?)*b").firstMatch(in: tainted) // $ regex=(c?a?)*b MISSING: redos-vulnerable

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex("(?:a|a?)+b").firstMatch(in: tainted) // $ redos-vulnerable regex=(?:a|a?)+b

    // BAD - but not detected.
    // attack string: "ab" x lots + "!"
    _ = try Regex("(a?b?)*$").firstMatch(in: tainted) // $ regex=(a?b?)*$ MISSING: redos-vulnerable

    // BAD
    // (no confirmed attack string)
    _ = try Regex("PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)").firstMatch(in: tainted) // $ redos-vulnerable regex=PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)

    // BAD
    // attack string: "a" x lots + "!"
    _ = try Regex(#"^((a)+\w)+$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^((a)+\w)+$

    // BAD
    // attack string: "bbbbbbbbbb." x lots + "!"
    _ = try Regex("^(b+.)+$").firstMatch(in: tainted) // $ redos-vulnerable regex=^(b+.)+$

    // BAD - all 4 bad combinations of nested * and +
    // attack string: "a" x lots + "!"
    _ = try Regex("(a*)*b").firstMatch(in: tainted) // $ redos-vulnerable regex=(a*)*b
    _ = try Regex("(a+)*b").firstMatch(in: tainted) // $ redos-vulnerable regex=(a+)*b
    _ = try Regex("(a*)+b").firstMatch(in: tainted) // $ redos-vulnerable regex=(a*)+b
    _ = try Regex("(a+)+b").firstMatch(in: tainted) // $ redos-vulnerable regex=(a+)+b

    // GOOD
    _ = try Regex("(a|b)+").firstMatch(in: tainted) // $ regex=(a|b)+

    // GOOD
    _ = try Regex(#"(?:[\s;,"'<>(){}|\[\]@=+*]|:(?![/\\]))+"#).firstMatch(in: tainted) // $ regex=(?:[\s;,"'<>(){}|\[\]@=+*]|:(?![/\\]))+

    // BAD?
    // (no confirmed attack string)
    _ = try Regex(#"^((?:a{|-)|\w\{)+X$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^((?:a{|-)|\w\{)+X$
    _ = try Regex(#"^((?:a{0|-)|\w\{\d)+X$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^((?:a{0|-)|\w\{\d)+X$
    _ = try Regex(#"^((?:a{0,|-)|\w\{\d,)+X$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^((?:a{0,|-)|\w\{\d,)+X$
    _ = try Regex(#"^((?:a{0,2|-)|\w\{\d,\d)+X$"#).firstMatch(in: tainted) // $ redos-vulnerable regex=^((?:a{0,2|-)|\w\{\d,\d)+X$

    // GOOD
    _ = try Regex(#"^((?:a{0,2}|-)|\w\{\d,\d\})+X$"#).firstMatch(in: tainted) // $ regex=^((?:a{0,2}|-)|\w\{\d,\d\})+X$

    // BAD
    // attack string: "X" + "a" x lots
    _ = try Regex(#"X(\u0061|a)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\u0061|a)*Y

    // GOOD
    _ = try Regex(#"X(\u0061|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\u0061|b)+Y

    // BAD
    // attack string: "X" + "a" x lots
    _ = try Regex(#"X(\U00000061|a)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\U00000061|a)*Y

    // GOOD
    _ = try Regex(#"X(\U00000061|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\U00000061|b)+Y

    // BAD
    // attack string: "X" + "a" x lots
    _ = try Regex(#"X(\x61|a)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\x61|a)*Y

    // GOOD
    _ = try Regex(#"X(\x61|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\x61|b)+Y

    // BAD
    // attack string: "X" + "a" x lots
    _ = try Regex(#"X(\x{061}|a)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\x{061}|a)*Y

    // GOOD
    _ = try Regex(#"X(\x{061}|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\x{061}|b)+Y

    // BAD
    // attack string: "X" + "7" x lots
    _ = try Regex(#"X(\p{Digit}|7)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\p{Digit}|7)*Y

    // GOOD
    _ = try Regex(#"X(\p{Digit}|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\p{Digit}|b)+Y

    // BAD
    // attack string: "X" + "b" x lots
    _ = try Regex(#"X(\P{Digit}|b)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\P{Digit}|b)*Y

    // GOOD
    _ = try Regex(#"X(\P{Digit}|7)+Y"#).firstMatch(in: tainted) // $ regex=X(\P{Digit}|7)+Y

    // BAD
    // attack string: "X" + "7" x lots
    _ = try Regex(#"X(\p{IsDigit}|7)*Y"#).firstMatch(in: tainted) // $ redos-vulnerable regex=X(\p{IsDigit}|7)*Y

    // GOOD
    _ = try Regex(#"X(\p{IsDigit}|b)+Y"#).firstMatch(in: tainted) // $ regex=X(\p{IsDigit}|b)+Y

    // BAD - but not detected
    // attack string: "X" + "a" x lots
    _ = try Regex(#"X(\p{Alpha}|a)*Y"#).firstMatch(in: tainted) // $ regex=X(\p{Alpha}|a)*Y MISSING: redos-vulnerable

    // GOOD
    _ = try Regex(#"X(\p{Alpha}|7)+Y"#).firstMatch(in: tainted) // $ regex=X(\p{Alpha}|7)+Y

    // GOOD
    _ = try Regex(#"("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)"#).firstMatch(in: tainted) // $ regex=("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)
    // BAD
    // attack string: "##" x lots + "\na"
    _ = try Regex(#"("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)"#).wholeMatch(in: tainted) // $ regex=("[^"]*?"|[^"\s]+)+(?=\s*|\s*$) MISSING: redos-vulnerable

    // BAD
    // attack string: "/" + "\\/a" x lots
    _ = try Regex(#"/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X"#).firstMatch(in: tainted) // $ redos-vulnerable regex=/("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)X
    _ = try Regex(#"/("[^"]*?"|[^"\s]+)+(?=X)"#).firstMatch(in: tainted) // $ redos-vulnerable regex=/("[^"]*?"|[^"\s]+)+(?=X)

    // BAD
    // attack string: "0" x lots + "!"
    _ = try Regex(#"\A(\d|0)*x"#).firstMatch(in: tainted) // $ redos-vulnerable regex=\A(\d|0)*x
    _ = try Regex(#"(\d|0)*\Z"#).firstMatch(in: tainted) // $ redos-vulnerable regex=(\d|0)*\Z
    _ = try Regex(#"\b(\d|0)*x"#).firstMatch(in: tainted) // $ redos-vulnerable regex=\b(\d|0)*x

    // GOOD - possessive quantifiers don't backtrack
    _ = try Regex("(a*+)*+b").firstMatch(in: tainted) // $ hasParseFailure regex=(a*+)*+b
    _ = try Regex("(a*)*+b").firstMatch(in: tainted) // $ hasParseFailure regex=(a*)*+b
    _ = try Regex("(a*+)*b").firstMatch(in: tainted) // $ hasParseFailure regex=(a*+)*b

    // BAD - but not detected due to the way possessive quantifiers are approximated
    // attack string: "aab" x lots + "!"
    _ = try Regex("((aa|a*+)b)*c").firstMatch(in: tainted) // $ hasParseFailure regex=((aa|a*+)b)*c MISSING: redos-vulnerable
}