Merge pull request #7723 from joefarebrother/redos

Java: Add ReDoS queries
This commit is contained in:
Joe Farebrother
2022-05-12 13:50:38 +01:00
committed by GitHub
39 changed files with 5715 additions and 59 deletions

View File

@@ -0,0 +1,439 @@
import java.util.regex.Pattern;
class ExpRedosTest {
static String[] regs = {
// NOT GOOD; attack: "_" + "__".repeat(100)
// Adapted from marked (https://github.com/markedjs/marked), which is licensed
// under the MIT license; see file marked-LICENSE.
"^\\b_((?:__|[\\s\\S])+?)_\\b|^\\*((?:\\*\\*|[\\s\\S])+?)\\*(?!\\*)", // $ hasExpRedos
// GOOD
// Adapted from marked (https://github.com/markedjs/marked), which is licensed
// under the MIT license; see file marked-LICENSE.
"^\\b_((?:__|[^_])+?)_\\b|^\\*((?:\\*\\*|[^*])+?)\\*(?!\\*)",
// GOOD - there is no witness in the end that could cause the regexp to not match
// Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
// which is licensed under the MIT license; see file brace-expansion-LICENSE.
"(.*,)+.+",
// NOT GOOD; attack: " '" + "\\\\".repeat(100)
// Adapted from CodeMirror (https://github.com/codemirror/codemirror),
// which is licensed under the MIT license; see file CodeMirror-LICENSE.
"^(?:\\s+(?:\"(?:[^\"\\\\]|\\\\\\\\|\\\\.)+\"|'(?:[^'\\\\]|\\\\\\\\|\\\\.)+'|\\((?:[^)\\\\]|\\\\\\\\|\\\\.)+\\)))?", // $ hasExpRedos
// GOOD
// Adapted from lulucms2 (https://github.com/yiifans/lulucms2).
"\\(\\*(?:[\\s\\S]*?\\(\\*[\\s\\S]*?\\*\\))*[\\s\\S]*?\\*\\)",
// GOOD
// Adapted from jest (https://github.com/facebook/jest), which is licensed
// under the MIT license; see file jest-LICENSE.
"^ *(\\S.*\\|.*)\\n *([-:]+ *\\|[-| :]*)\\n((?:.*\\|.*(?:\\n|$))*)\\n*",
// NOT GOOD, variant of good3; attack: "a|\n:|\n" + "||\n".repeat(100)
"^ *(\\S.*\\|.*)\\n *([-:]+ *\\|[-| :]*)\\n((?:.*\\|.*(?:\\n|$))*)a", // $ hasExpRedos
// NOT GOOD; attack: "/" + "\\/a".repeat(100)
// Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
// which is licensed under the Apache License 2.0; see file ANodeBlog-LICENSE.
"\\/(?![ *])(\\\\\\/|.)*?\\/[gim]*(?=\\W|$)", // $ hasExpRedos
// NOT GOOD; attack: "##".repeat(100) + "\na"
// Adapted from CodeMirror (https://github.com/codemirror/codemirror),
// which is licensed under the MIT license; see file CodeMirror-LICENSE.
"^([\\s\\[\\{\\(]|#.*)*$", // $ hasExpRedos
// GOOD
"(\\r\\n|\\r|\\n)+",
// BAD - PoC: `node -e "/((?:[^\"\']|\".*?\"|\'.*?\')*?)([(,)]|$)/.test(\"'''''''''''''''''''''''''''''''''''''''''''''\\\"\");"`. It's complicated though, because the regexp still matches something, it just matches the empty-string after the attack string.
// NOT GOOD; attack: "a" + "[]".repeat(100) + ".b\n"
// Adapted from Knockout (https://github.com/knockout/knockout), which is
// licensed under the MIT license; see file knockout-LICENSE
"^[\\_$a-z][\\_$a-z0-9]*(\\[.*?\\])*(\\.[\\_$a-z][\\_$a-z0-9]*(\\[.*?\\])*)*$", // $ hasExpRedos
// GOOD
"(a|.)*",
// Testing the NFA - only some of the below are detected.
"^([a-z]+)+$", // $ hasExpRedos
"^([a-z]*)*$", // $ hasExpRedos
"^([a-zA-Z0-9])(([\\\\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$", // $ hasExpRedos
"^(([a-z])+.)+[A-Z]([a-z])+$", // $ hasExpRedos
// NOT GOOD; attack: "[" + "][".repeat(100) + "]!"
// Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
// is licensed under the MIT license; see file Prototype.js-LICENSE.
"(([\\w#:.~>+()\\s-]+|\\*|\\[.*?\\])+)\\s*(,|$)", // $ hasExpRedos
// NOT GOOD; attack: "'" + "\\a".repeat(100) + '"'
// Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
// under the MIT license; see file Prism-LICENSE.
"(\"|')(\\\\?.)*?\\1", // $ hasExpRedos
// NOT GOOD
"(b|a?b)*c", // $ hasExpRedos
// NOT GOOD
"(a|aa?)*b", // $ hasExpRedos
// GOOD
"(.|\\n)*!",
// NOT GOOD; attack: "\n".repeat(100) + "."
"(?s)(.|\\n)*!", // $ hasExpRedos
// GOOD
"([\\w.]+)*",
// NOT GOOD
"(a|aa?)*b", // $ hasExpRedos
// NOT GOOD
"(([\\s\\S]|[^a])*)\"", // $ hasExpRedos
// GOOD - there is no witness in the end that could cause the regexp to not match
"([^\"']+)*",
// NOT GOOD
"((.|[^a])*)\"", // $ hasExpRedos
// GOOD
"((a|[^a])*)\"",
// NOT GOOD
"((b|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"((G|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"(([0-9]|[^a])*)\"", // $ hasExpRedos
// NOT GOOD
"(?:=(?:([!#\\$%&'\\*\\+\\-\\.\\^_`\\|~0-9A-Za-z]+)|\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"))?", // $ MISSING: hasExpRedos
// NOT GOOD
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"", // $ MISSING: hasExpRedos
// GOOD
"\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"",
// NOT GOOD
"(([a-z]|[d-h])*)\"", // $ hasExpRedos
// NOT GOOD
"(([^a-z]|[^0-9])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\d|[0-9])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\s|\\s)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\w|G)*)\"", // $ hasExpRedos
// GOOD
"((\\s|\\d)*)\"",
// NOT GOOD
"((\\d|\\w)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\d|5)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\s|[\\f])*)\"", // $ hasExpRedos
// NOT GOOD - but not detected (likely because \v is a character class in Java rather than a specific character in other langs)
"((\\s|[\\v]|\\\\v)*)\"", // $ MISSING: hasExpRedos
// NOT GOOD
"((\\f|[\\f])*)\"", // $ hasExpRedos
// NOT GOOD
"((\\W|\\D)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\S|\\w)*)\"", // $ hasExpRedos
// NOT GOOD
"((\\S|[\\w])*)\"", // $ hasExpRedos
// NOT GOOD
"((1s|[\\da-z])*)\"", // $ hasExpRedos
// NOT GOOD
"((0|[\\d])*)\"", // $ hasExpRedos
// NOT GOOD
"(([\\d]+)*)\"", // $ hasExpRedos
// GOOD - there is no witness in the end that could cause the regexp to not match
"(\\d+(X\\d+)?)+",
// GOOD - there is no witness in the end that could cause the regexp to not match
"([0-9]+(X[0-9]*)?)*",
// GOOD
"^([^>]+)*(>|$)",
// NOT GOOD
"^([^>a]+)*(>|$)", // $ hasExpRedos
// NOT GOOD
"(\\n\\s*)+$", // $ hasExpRedos
// NOT GOOD
"^(?:\\s+|#.*|\\(\\?#[^)]*\\))*(?:[?*+]|\\{\\d+(?:,\\d*)?})", // $ hasExpRedos
// NOT GOOD
"\\{\\[\\s*([a-zA-Z]+)\\(([a-zA-Z]+)\\)((\\s*([a-zA-Z]+)\\: ?([ a-zA-Z{}]+),?)+)*\\s*\\]\\}", // $ hasExpRedos
// NOT GOOD
"(a+|b+|c+)*c", // $ hasExpRedos
// NOT GOOD
"(((a+a?)*)+b+)", // $ hasExpRedos
// NOT GOOD
"(a+)+bbbb", // $ hasExpRedos
// GOOD
"(a+)+aaaaa*a+",
// NOT GOOD
"(a+)+aaaaa$", // $ hasExpRedos
// GOOD
"(\\n+)+\\n\\n",
// NOT GOOD
"(\\n+)+\\n\\n$", // $ hasExpRedos
// NOT GOOD
"([^X]+)*$", // $ hasExpRedos
// NOT GOOD
"(([^X]b)+)*$", // $ hasExpRedos
// GOOD
"(([^X]b)+)*($|[^X]b)",
// NOT GOOD
"(([^X]b)+)*($|[^X]c)", // $ hasExpRedos
// GOOD
"((ab)+)*ababab",
// GOOD
"((ab)+)*abab(ab)*(ab)+",
// GOOD
"((ab)+)*",
// NOT GOOD
"((ab)+)*$", // $ hasExpRedos
// GOOD
"((ab)+)*[a1][b1][a2][b2][a3][b3]",
// NOT GOOD
"([\\n\\s]+)*(.)", // $ hasExpRedos
// GOOD - any witness passes through the accept state.
"(A*A*X)*",
// GOOD
"([^\\\\\\]]+)*",
// NOT GOOD
"(\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\s*foobarbaz\\d*foobarbaz\\w*)+-", // $ hasExpRedos
// NOT GOOD
"(.thisisagoddamnlongstringforstresstestingthequery|\\sthisisagoddamnlongstringforstresstestingthequery)*-", // $ hasExpRedos
// NOT GOOD
"(thisisagoddamnlongstringforstresstestingthequery|this\\w+query)*-", // $ hasExpRedos
// GOOD
"(thisisagoddamnlongstringforstresstestingthequery|imanotherbutunrelatedstringcomparedtotheotherstring)*-",
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
"foo([\uDC66\uDC67]|[\uDC68\uDC69])*foo", // $ SPURIOUS: hasExpRedos
// GOOD (but false positive caused by the extractor converting all four unpaired surrogates to \uFFFD)
"foo((\uDC66|\uDC67)|(\uDC68|\uDC69))*foo", // $ SPURIOUS: hasExpRedos
// NOT GOOD (but cannot currently construct a prefix)
"a{2,3}(b+)+X", // $ hasExpRedos
// NOT GOOD (and a good prefix test)
"^<(\\w+)((?:\\s+\\w+(?:\\s*=\\s*(?:(?:\"[^\"]*\")|(?:'[^']*')|[^>\\s]+))?)*)\\s*(\\/?)>", // $ hasExpRedos
// GOOD
"(a+)*[\\s\\S][\\s\\S][\\s\\S]?",
// GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
"(a+)*[\\s\\S]{2,3}", // $ SPURIOUS: hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
"(a+)*([\\s\\S]{2,}|X)$", // $ SPURIOUS: hasExpRedos
// GOOD
"(a+)*([\\s\\S]*|X)$",
// NOT GOOD
"((a+)*$|[\\s\\S]+)", // $ hasExpRedos
// GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
"([\\s\\S]+|(a+)*$)", // $ SPURIOUS: hasExpRedos
// GOOD
"((;|^)a+)+$",
// NOT GOOD (a good prefix test)
"(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f", // $ hasExpRedos
// NOT GOOD
"^ab(c+)+$", // $ hasExpRedos
// NOT GOOD
"(\\d(\\s+)*){20}", // $ hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists.
"(([^/]|X)+)(\\/[\\s\\S]*)*$", // $ SPURIOUS: hasExpRedos
// GOOD - but we spuriously conclude that a rejecting suffix exists.
"^((x([^Y]+)?)*(Y|$))", // $ SPURIOUS: hasExpRedos
// NOT GOOD
"(a*)+b", // $ hasExpRedos
// NOT GOOD
"foo([\\w-]*)+bar", // $ hasExpRedos
// NOT GOOD
"((ab)*)+c", // $ hasExpRedos
// NOT GOOD
"(a?a?)*b", // $ hasExpRedos
// GOOD
"(a?)*b",
// NOT GOOD - but not detected
"(c?a?)*b", // $ MISSING: hasExpRedos
// NOT GOOD
"(?:a|a?)+b", // $ hasExpRedos
// NOT GOOD - but not detected.
"(a?b?)*$", // $ MISSING: hasExpRedos
// NOT GOOD
"PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)", // $ hasExpRedos
// NOT GOOD
"^((a)+\\w)+$", // $ hasExpRedos
// NOT GOOD
"^(b+.)+$", // $ hasExpRedos
// GOOD
"a*b",
// All 4 bad combinations of nested * and +
"(a*)*b", // $ hasExpRedos
"(a+)*b", // $ hasExpRedos
"(a*)+b", // $ hasExpRedos
"(a+)+b", // $ hasExpRedos
// GOOD
"(a|b)+",
"(?:[\\s;,\"'<>(){}|\\[\\]@=+*]|:(?![/\\\\]))+",
"^((?:a{|-)|\\w\\{)+X$", // $ hasParseFailure
"^((?:a{0|-)|\\w\\{\\d)+X$", // $ hasParseFailure
"^((?:a{0,|-)|\\w\\{\\d,)+X$", // $ hasParseFailure
"^((?:a{0,2|-)|\\w\\{\\d,\\d)+X$", // $ hasParseFailure
// GOOD
"^((?:a{0,2}|-)|\\w\\{\\d,\\d\\})+X$",
// NOT GOOD
"X(\\u0061|a)*Y", // $ hasExpRedos
// GOOD
"X(\\u0061|b)+Y",
// NOT GOOD
"X(\\x61|a)*Y", // $ hasExpRedos
// GOOD
"X(\\x61|b)+Y",
// NOT GOOD
"X(\\x{061}|a)*Y", // $ hasExpRedos
// GOOD
"X(\\x{061}|b)+Y",
// NOT GOOD
"X(\\p{Digit}|7)*Y", // $ hasExpRedos
// GOOD
"X(\\p{Digit}|b)+Y",
// NOT GOOD
"X(\\P{Digit}|b)*Y", // $ hasExpRedos
// GOOD
"X(\\P{Digit}|7)+Y",
// NOT GOOD
"X(\\p{IsDigit}|7)*Y", // $ hasExpRedos
// GOOD
"X(\\p{IsDigit}|b)+Y",
// NOT GOOD - but not detected
"X(\\p{Alpha}|a)*Y", // $ MISSING: hasExpRedos
// GOOD
"X(\\p{Alpha}|7)+Y",
// GOOD
"(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)",
// BAD
"/(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)X", // $ hasExpRedos
"/(\"[^\"]*?\"|[^\"\\s]+)+(?=X)", // $ hasExpRedos
// BAD
"\\A(\\d|0)*x", // $ hasExpRedos
"(\\d|0)*\\Z", // $ hasExpRedos
"\\b(\\d|0)*x", // $ hasExpRedos
// GOOD - possessive quantifiers don't backtrack
"(a*+)*+b",
"(a*)*+b",
"(a*+)*b",
// BAD
"(a*)*b", // $ hasExpRedos
// BAD - but not detected due to the way possessive quantifiers are approximated
"((aa|a*+)b)*c" // $ MISSING: hasExpRedos
};
void test() {
for (int i = 0; i < regs.length; i++) {
Pattern.compile(regs[i]);
}
}
}

View File

@@ -0,0 +1,84 @@
import java.util.regex.Pattern;
import java.util.function.Predicate;
import javax.servlet.http.HttpServletRequest;
import com.google.common.base.Splitter;
class PolyRedosTest {
void test(HttpServletRequest request) {
String tainted = request.getParameter("inp");
String reg = "0\\.\\d+E?\\d+!";
Predicate<String> dummyPred = (s -> s.length() % 7 == 0);
tainted.matches(reg); // $ hasPolyRedos
tainted.split(reg); // $ hasPolyRedos
tainted.split(reg, 7); // $ hasPolyRedos
tainted.replaceAll(reg, "a"); // $ hasPolyRedos
tainted.replaceFirst(reg, "a"); // $ hasPolyRedos
Pattern.matches(reg, tainted); // $ hasPolyRedos
Pattern.compile(reg).matcher(tainted).matches(); // $ hasPolyRedos
Pattern.compile(reg).split(tainted); // $ hasPolyRedos
Pattern.compile(reg, Pattern.DOTALL).split(tainted); // $ hasPolyRedos
Pattern.compile(reg).split(tainted, 7); // $ hasPolyRedos
Pattern.compile(reg).splitAsStream(tainted); // $ hasPolyRedos
Pattern.compile(reg).asPredicate().test(tainted); // $ hasPolyRedos
Pattern.compile(reg).asMatchPredicate().negate().and(dummyPred).or(dummyPred).test(tainted); // $ hasPolyRedos
Predicate.not(dummyPred.and(dummyPred.or(Pattern.compile(reg).asPredicate()))).test(tainted); // $ hasPolyRedos
Splitter.on(Pattern.compile(reg)).split(tainted); // $ hasPolyRedos
Splitter.on(reg).split(tainted);
Splitter.onPattern(reg).split(tainted); // $ hasPolyRedos
Splitter.onPattern(reg).splitToList(tainted); // $ hasPolyRedos
Splitter.onPattern(reg).limit(7).omitEmptyStrings().trimResults().split(tainted); // $ hasPolyRedos
Splitter.onPattern(reg).withKeyValueSeparator(" => ").split(tainted); // $ hasPolyRedos
Splitter.on(";").withKeyValueSeparator(reg).split(tainted);
Splitter.on(";").withKeyValueSeparator(Splitter.onPattern(reg)).split(tainted); // $ hasPolyRedos
}
void test2(HttpServletRequest request) {
String tainted = request.getParameter("inp");
Pattern p1 = Pattern.compile(".*a");
Pattern p2 = Pattern.compile(".*b");
p1.matcher(tainted).matches();
p2.matcher(tainted).find(); // $ hasPolyRedos
}
void test3(HttpServletRequest request) {
String tainted = request.getParameter("inp");
Pattern p1 = Pattern.compile("ab*b*");
Pattern p2 = Pattern.compile("cd*d*");
p1.matcher(tainted).matches(); // $ hasPolyRedos
p2.matcher(tainted).find();
}
void test4(HttpServletRequest request) {
String tainted = request.getParameter("inp");
tainted.matches(".*a");
tainted.replaceAll(".*b", "c"); // $ hasPolyRedos
}
static Pattern p3 = Pattern.compile(".*a");
static Pattern p4 = Pattern.compile(".*b");
void test5(HttpServletRequest request) {
String tainted = request.getParameter("inp");
p3.asMatchPredicate().test(tainted);
p4.asPredicate().test(tainted); // $ hasPolyRedos
}
void test6(HttpServletRequest request) {
Pattern p = Pattern.compile("^a*a*$");
p.matcher(request.getParameter("inp")).matches(); // $ hasPolyRedos
p.matcher(request.getHeader("If-None-Match")).matches();
p.matcher(request.getRequestURI()).matches();
p.matcher(request.getCookies()[0].getName()).matches();
}
}

View File

@@ -0,0 +1,19 @@
import java
import TestUtilities.InlineExpectationsTest
import semmle.code.java.security.performance.PolynomialReDoSQuery
class HasPolyRedos extends InlineExpectationsTest {
HasPolyRedos() { this = "HasPolyRedos" }
override string getARelevantTag() { result = "hasPolyRedos" }
override predicate hasActualResult(Location location, string element, string tag, string value) {
tag = "hasPolyRedos" and
exists(DataFlow::PathNode source, DataFlow::PathNode sink, PolynomialBackTrackingTerm regexp |
hasPolynomialReDoSResult(source, sink, regexp) and
location = sink.getNode().getLocation() and
element = sink.getNode().toString() and
value = ""
)
}
}

View File

@@ -0,0 +1,29 @@
import java
import TestUtilities.InlineExpectationsTest
import semmle.code.java.security.performance.ExponentialBackTracking
import semmle.code.java.regex.regex
class HasExpRedos extends InlineExpectationsTest {
HasExpRedos() { this = "HasExpRedos" }
override string getARelevantTag() { result = ["hasExpRedos", "hasParseFailure"] }
override predicate hasActualResult(Location location, string element, string tag, string value) {
tag = "hasExpRedos" and
exists(RegExpTerm t, string pump, State s, string prefixMsg |
hasReDoSResult(t, pump, s, prefixMsg) and
not t.getRegex().getAMode() = "VERBOSE" and
value = "" and
location = t.getLocation() and
element = t.toString()
)
or
tag = "hasParseFailure" and
exists(Regex r |
r.failedToParse(_) and
value = "" and
location = r.getLocation() and
element = r.toString()
)
}
}

View File

@@ -0,0 +1 @@
// semmle-extractor-options: --javac-args -cp ${testdir}/../../../stubs/servlet-api-2.4:${testdir}/../../../stubs/guava-30.0