Files
codeql/java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java
2023-09-13 17:58:31 +01:00

446 lines
14 KiB
Java

import java.util.regex.Pattern;
class ExpRedosTest {
static String[] regs = {
// NOT GOOD; attack: "_" + "__".repeat(100)
// Adapted from marked (https://github.com/markedjs/marked), which is licensed
// under the MIT license; see file marked-LICENSE.
"^\\b_((?:__|[\\s\\S])+?)_\\b|^\\*((?:\\*\\*|[\\s\\S])+?)\\*(?!\\*)", // $ hasExpRedos
// GOOD
// Adapted from marked (https://github.com/markedjs/marked), which is licensed
// under the MIT license; see file marked-LICENSE.
"^\\b_((?:__|[^_])+?)_\\b|^\\*((?:\\*\\*|[^*])+?)\\*(?!\\*)",
// GOOD - there is no witness in the end that could cause the regexp to not match
// Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
// which is licensed under the MIT license; see file brace-expansion-LICENSE.
"(.*,)+.+",
// NOT GOOD; attack: " '" + "\\\\".repeat(100)
// Adapted from CodeMirror (https://github.com/codemirror/codemirror),
// which is licensed under the MIT license; see file CodeMirror-LICENSE.
"^(?:\\s+(?:\"(?:[^\"\\\\]|\\\\\\\\|\\\\.)+\"|'(?:[^'\\\\]|\\\\\\\\|\\\\.)+'|\\((?:[^)\\\\]|\\\\\\\\|\\\\.)+\\)))?", // $ hasExpRedos
// GOOD
// Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
"\\(\\*(?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))*[\\s\\S]*?\\*\\)",
// GOOD
// Adapted from jest (https://github.com/facebook/jest), which is licensed
// under the MIT license; see file jest-LICENSE.
"^ *(\\S.*\\|.*)\\n *([-:]+ *\\|[-| :]*)\\n((?:.*\\|.*(?:\\n|$))*)\\n*",
// NOT GOOD, variant of good3; attack: "a|\n:|\n" + "||\n".repeat(100)
"^ *(\\S.*\\|.*)\\n *([-:]+ *\\|[-| :]*)\\n((?:.*\\|.*(?:\\n|$))*)a", // $ hasExpRedos
// NOT GOOD; attack: "/" + "\\/a".repeat(100)
// Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
// which is licensed under the Apache License 2.0; see file ANodeBlog-LICENSE.
"\\/(?![ *])(\\\\\\/|.)*?\\/[gim]*(?=\\W|$)", // $ hasExpRedos
// NOT GOOD; attack: "##".repeat(100) + "\na"
// Adapted from CodeMirror (https://github.com/codemirror/codemirror),
// which is licensed under the MIT license; see file CodeMirror-LICENSE.
"^([\\s\\[\\{\\(]|#.*)*$", // $ hasExpRedos
// GOOD
"(\\r\\n|\\r|\\n)+",
// BAD - PoC: `node -e "/((?:[^\"\']|\".*?\"|\'.*?\')*?)([(,)]|$)/.test(\"'''''''''''''''''''''''''''''''''''''''''''''\\\"\");"`. It's complicated though, because the regexp still matches something, it just matches the empty-string after the attack string.
// NOT GOOD; attack: "a" + "[]".repeat(100) + ".b\n"
// Adapted from Knockout (https://github.com/knockout/knockout), which is
// licensed under the MIT license; see file knockout-LICENSE
"^[\\_$a-z][\\_$a-z0-9]*(\\[.*?\\])*(\\.[\\_$a-z][\\_$a-z0-9]*(\\[.*?\\])*)*$", // $ hasExpRedos
// GOOD
"(a|.)*",
// Testing the NFA - only some of the below are detected.
"^([a-z]+)+$", // $ hasExpRedos
"^([a-z]*)*$", // $ hasExpRedos
"^([a-zA-Z0-9])(([\\\\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$", // $ hasExpRedos
"^(([a-z])+.)+[A-Z]([a-z])+$", // $ hasExpRedos
// NOT GOOD; attack: "[" + "][".repeat(100) + "]!"
// Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
// is licensed under the MIT license; see file Prototype.js-LICENSE.
"(([\\w#:.~>+()\\s-]+|\\*|\\[.*?\\])+)\\s*(,|$)", // $ hasExpRedos
// NOT GOOD; attack: "'" + "\\a".repeat(100) + '"'
// Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
// under the MIT license; see file Prism-LICENSE.
"(\"|')(\\\\?.)*?\\1", // $ hasExpRedos
// NOT GOOD
"(b|a?b)*c", // $ hasExpRedos
// NOT GOOD
"(a|aa?)*b", // $ hasExpRedos
// GOOD
"(.|\\n)*!",
// NOT GOOD; attack: "\n".repeat(100) + "."
"(?s)(.|\\n)*!", // $ hasExpRedos
// NOT GOOD; attack: "\n".repeat(100) + "."
"(?is)(.|\\n)*!", // $ hasExpRedos
// GOOD
"([\\w.]+)*",
// NOT GOOD
"(a|aa?)*b", // $ hasExpRedos
// NOT GOOD
"(([\\s\\S]|[^a])*)\"", // $ hasExpRedos
// GOOD - there is no witness in the end that could cause the regexp to not match
"([^\"']+)*",
// NOT GOOD
"((.|[^a])*)\"", // $ hasExpRedos
// GOOD
"((a|[^a])*)\"",
// NOT GOOD
"((b|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"((G|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"(([0-9]|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"(?:=(?:([!#\\$%&'\\*\\+\\-\\.\\^_`\\|~0-9A-Za-z]+)|\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"))?", // $ MISSING: hasExpRedos
// NOT GOOD
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"", // $ MISSING: hasExpRedos
// GOOD
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"",
// NOT GOOD
"(([a-z]|[d-h])*)\"", // $ hasExpRedos
// NOT GOOD
"(([^a-z]|[^0-9])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\d|[0-9])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\s|\\s)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\w|G)*)\"", // $ hasExpRedos
// GOOD
"((\\s|\\d)*)\"",
// NOT GOOD
"((\\d|\\w)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\d|5)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\s|[\\f])*)\"", // $ hasExpRedos
// NOT GOOD - but not detected (likely because \v is a character class in Java rather than a specific character in other langs)
"((\\s|[\\v]|\\\\v)*)\"", // $ MISSING: hasExpRedos
// NOT GOOD
"((\\f|[\\f])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\W|\\D)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\S|\\w)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\S|[\\w])*)\"", // $ hasExpRedos
// NOT GOOD
"((1s|[\\da-z])*)\"", // $ hasExpRedos
// NOT GOOD
"((0|[\\d])*)\"", // $ hasExpRedos
// NOT GOOD
"(([\\d]+)*)\"", // $ hasExpRedos
// GOOD - there is no witness in the end that could cause the regexp to not match
"(\\d+(X\\d+)?)+",
// GOOD - there is no witness in the end that could cause the regexp to not match
"([0-9]+(X[0-9]*)?)*",
// GOOD
"^([^>]+)*(>|$)",
// NOT GOOD
"^([^>a]+)*(>|$)", // $ hasExpRedos
// NOT GOOD
"(\\n\\s*)+$", // $ hasExpRedos
// NOT GOOD
"^(?:\\s+|#.*|\\(\\?#[^)]*\\))*(?:[?*+]|\\{\\d+(?:,\\d*)?})", // $ hasExpRedos
// NOT GOOD
"\\{\\[\\s*([a-zA-Z]+)\\(([a-zA-Z]+)\\)((\\s*([a-zA-Z]+)\\: ?([ a-zA-Z{}]+),?)+)*\\s*\\]\\}", // $ hasExpRedos
// NOT GOOD
"(a+|b+|c+)*c", // $ hasExpRedos
// NOT GOOD
"(((a+a?)*)+b+)", // $ hasExpRedos
// NOT GOOD
"(a+)+bbbb", // $ hasExpRedos
// GOOD
"(a+)+aaaaa*a+",
// NOT GOOD
"(a+)+aaaaa$", // $ hasExpRedos
// GOOD
"(\\n+)+\\n\\n",
// NOT GOOD
"(\\n+)+\\n\\n$", // $ hasExpRedos
// NOT GOOD
"([^X]+)*$", // $ hasExpRedos
// NOT GOOD
"(([^X]b)+)*$", // $ hasExpRedos
// GOOD
"(([^X]b)+)*($|[^X]b)",
// NOT GOOD
"(([^X]b)+)*($|[^X]c)", // $ hasExpRedos
// GOOD
"((ab)+)*ababab",
// GOOD
"((ab)+)*abab(ab)*(ab)+",
// GOOD
"((ab)+)*",
// NOT GOOD
"((ab)+)*$", // $ hasExpRedos
// GOOD
"((ab)+)*[a1][b1][a2][b2][a3][b3]",
// NOT GOOD
"([\\n\\s]+)*(.)", // $ hasExpRedos
// GOOD - any witness passes through the accept state.
"(A*A*X)*",
// GOOD
"([^\\\\\\]]+)*",
// NOT GOOD
"(\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\s*foobarbaz\\d*foobarbaz\\w*)+-", // $ hasExpRedos
// NOT GOOD
"(.thisisagoddamnlongstringforstresstestingthequery|\\sthisisagoddamnlongstringforstresstestingthequery)*-", // $ hasExpRedos
// NOT GOOD
"(thisisagoddamnlongstringforstresstestingthequery|this\\w+query)*-", // $ hasExpRedos
// GOOD
"(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-",
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
"foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo", // $ SPURIOUS: hasExpRedos
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
"foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo", // $ SPURIOUS: hasExpRedos
// NOT GOOD (but cannot currently construct a prefix)
"a{2,3}(b+)+X", // $ hasExpRedos
// NOT GOOD (and a good prefix test)
"^<(\\w+)((?:\\s+\\w+(?:\\s*=\\s*(?:(?:\"[^\"]*\")|(?:'[^']*')|[^>\\s]+))?)*)\\s*(\\/?)>", // $ hasExpRedos
// GOOD
"(a+)*[\\s\\S][\\s\\S][\\s\\S]?",
// GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
"(a+)*[\\s\\S]{2,3}", // $ SPURIOUS: hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
"(a+)*([\\s\\S]{2,}|X)$", // $ SPURIOUS: hasExpRedos
// GOOD
"(a+)*([\\s\\S]*|X)$",
// NOT GOOD
"((a+)*$|[\\s\\S]+)", // $ hasExpRedos
// GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
"([\\s\\S]+|(a+)*$)", // $ SPURIOUS: hasExpRedos
// GOOD
"((;|^)a+)+$",
// NOT GOOD (a good prefix test)
"(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f", // $ hasExpRedos
// NOT GOOD
"^ab(c+)+$", // $ hasExpRedos
// NOT GOOD
"(\\d(\\s+)*){20}", // $ hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists.
"(([^/]|X)+)(\\/[\\s\\S]*)*$", // $ SPURIOUS: hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists.
"^((x([^Y]+)?)*(Y|$))", // $ SPURIOUS: hasExpRedos
// NOT GOOD
"(a*)+b", // $ hasExpRedos
// NOT GOOD
"foo([\\w-]*)+bar", // $ hasExpRedos
// NOT GOOD
"((ab)*)+c", // $ hasExpRedos
// NOT GOOD
"(a?a?)*b", // $ hasExpRedos
// GOOD
"(a?)*b",
// NOT GOOD - but not detected
"(c?a?)*b", // $ MISSING: hasExpRedos
// NOT GOOD
"(?:a|a?)+b", // $ hasExpRedos
// NOT GOOD - but not detected.
"(a?b?)*$", // $ MISSING: hasExpRedos
// NOT GOOD
"PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)", // $ hasExpRedos
// NOT GOOD
"^((a)+\\w)+$", // $ hasExpRedos
// NOT GOOD
"^(b+.)+$", // $ hasExpRedos
// GOOD
"a*b",
// All 4 bad combinations of nested * and +
"(a*)*b", // $ hasExpRedos
"(a+)*b", // $ hasExpRedos
"(a*)+b", // $ hasExpRedos
"(a+)+b", // $ hasExpRedos
// GOOD
"(a|b)+",
"(?:[\\s;,\"'<>(){}|\\[\\]@=+*]|:(?![/\\\\]))+",
"^((?:a{|-)|\\w\\{)+X$", // $ hasParseFailure
"^((?:a{0|-)|\\w\\{\\d)+X$", // $ hasParseFailure
"^((?:a{0,|-)|\\w\\{\\d,)+X$", // $ hasParseFailure
"^((?:a{0,2|-)|\\w\\{\\d,\\d)+X$", // $ hasParseFailure
// GOOD
"^((?:a{0,2}|-)|\\w\\{\\d,\\d\\})+X$",
// NOT GOOD
"X(\\u0061|a)*Y", // $ hasExpRedos
// GOOD
"X(\\u0061|b)+Y",
// NOT GOOD
"X(\\x61|a)*Y", // $ hasExpRedos
// GOOD
"X(\\x61|b)+Y",
// NOT GOOD
"X(\\x{061}|a)*Y", // $ hasExpRedos
// GOOD
"X(\\x{061}|b)+Y",
// NOT GOOD
"X(\\p{Digit}|7)*Y", // $ hasExpRedos
// GOOD
"X(\\p{Digit}|b)+Y",
// NOT GOOD
"X(\\P{Digit}|b)*Y", // $ hasExpRedos
// GOOD
"X(\\P{Digit}|7)+Y",
// NOT GOOD
"X(\\p{IsDigit}|7)*Y", // $ hasExpRedos
// GOOD
"X(\\p{IsDigit}|b)+Y",
// NOT GOOD - but not detected
"X(\\p{Alpha}|a)*Y", // $ MISSING: hasExpRedos
// GOOD
"X(\\p{Alpha}|7)+Y",
// GOOD
"(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)",
// BAD
"/(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)X", // $ hasExpRedos
"/(\"[^\"]*?\"|[^\"\\s]+)+(?=X)", // $ hasExpRedos
// BAD
"\\A(\\d|0)*x", // $ hasExpRedos
"(\\d|0)*\\Z", // $ hasExpRedos
"\\b(\\d|0)*x", // $ hasExpRedos
// GOOD - possessive quantifiers don't backtrack
"(a*+)*+b",
"(a*)*+b",
"(a*+)*b",
// BAD
"(a*)*b", // $ hasExpRedos
// BAD - but not detected due to the way possessive quantifiers are approximated
"((aa|a*+)b)*c", // $ MISSING: hasExpRedos
// BAD - testing mode flag groups
"(?is)(a|aa?)*b" // $ hasExpRedos hasPrefixMsg= hasPump=a
};
void test() {
for (int i = 0; i < regs.length; i++) {
Pattern.compile(regs[i]);
}
}
}