refactor into more classes; add more test cases; add LITERAL sanitizer

This commit is contained in:
Jami Cogswell
2022-10-31 18:14:39 -04:00
parent 50d638d1b6
commit 91491d9a7b
5 changed files with 132 additions and 65 deletions

View File

@@ -2,6 +2,11 @@
private import semmle.code.java.dataflow.ExternalFlow
/** The class `java.util.regex.Pattern`. */
class TypeRegexPattern extends Class {
TypeRegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
}
private class RegexModel extends SummaryModelCsv {
override predicate row(string s) {
s =

View File

@@ -46,3 +46,8 @@ class TypeApacheSystemUtils extends Class {
this.hasQualifiedName(["org.apache.commons.lang", "org.apache.commons.lang3"], "SystemUtils")
}
}
/** The class `org.apache.commons.lang3.RegExUtils`. */
class TypeApacheRegExUtils extends Class {
TypeApacheRegExUtils() { this.hasQualifiedName("org.apache.commons.lang3", "RegExUtils") }
}

View File

@@ -1,60 +1,37 @@
/** Provides classes and predicates related to regex injection in Java. */
import java
import semmle.code.java.dataflow.FlowSources
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.regex.RegexFlowConfigs
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.frameworks.Regex
private import semmle.code.java.frameworks.apache.Lang
/**
* A data flow sink for untrusted user input used to construct regular expressions.
*/
/** A data flow sink for untrusted user input used to construct regular expressions. */
abstract class Sink extends DataFlow::ExprNode { }
/**
* A sanitizer for untrusted user input used to construct regular expressions.
*/
/** A sanitizer for untrusted user input used to construct regular expressions. */
abstract class Sanitizer extends DataFlow::ExprNode { }
// TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
/**
* A data flow sink for untrusted user input used to construct regular expressions.
*/
private class RegexSink extends Sink {
RegexSink() {
private class RegexInjectionSink extends Sink {
RegexInjectionSink() {
exists(MethodAccess ma, Method m | m = ma.getMethod() |
ma.getArgument(0) = this.asExpr() and
(
m.getDeclaringType() instanceof TypeString and
m.hasName(["matches", "split", "replaceFirst", "replaceAll"])
or
m.getDeclaringType() instanceof RegexPattern and
m.hasName(["compile", "matches"])
m instanceof StringRegexMethod or
m instanceof PatternRegexMethod
)
or
m.getDeclaringType() instanceof ApacheRegExUtils and
(
ma.getArgument(1) = this.asExpr() and
// only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern` above
m.getParameterType(1) instanceof TypeString and
m.hasName([
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst",
"replacePattern"
])
)
ma.getArgument(1) = this.asExpr() and
m instanceof ApacheRegExUtilsMethod
)
}
}
/**
* A call to a function whose name suggests that it escapes regular
* expression meta-characters.
*/
class RegexInjectionSanitizer extends Sanitizer {
/** A call to a function which escapes regular expression meta-characters. */
private class RegexInjectionSanitizer extends Sanitizer {
RegexInjectionSanitizer() {
// a function whose name suggests that it escapes regular expression meta-characters
exists(string calleeName, string sanitize, string regexp |
calleeName = this.asExpr().(Call).getCallee().getName() and
// TODO: add test case for sanitize? I think current tests only check escape
// TODO: should this be broader and only look for "escape|saniti[sz]e" and not "regexp?" as well? -- e.g. err on side of FNs?
sanitize = "(?:escape|saniti[sz]e)" and
regexp = "regexp?"
|
@@ -63,31 +40,70 @@ class RegexInjectionSanitizer extends Sanitizer {
".*)")
)
or
// adds Pattern.quote() as a sanitizer
// https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#quote-java.lang.String-: "Metacharacters or escape sequences in the input sequence will be given no special meaning."
// see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
// a call to the `Pattern.quote` method, which gives metacharacters or escape sequences no special meaning
exists(MethodAccess ma, Method m | m = ma.getMethod() |
m.getDeclaringType() instanceof RegexPattern and
(
ma.getArgument(0) = this.asExpr() and
m.hasName("quote")
)
ma.getArgument(0) = this.asExpr() and
m instanceof PatternQuoteMethod
)
or
// use of Pattern.LITERAL flag with `Pattern.compile` which gives metacharacters or escape sequences no special meaning
exists(MethodAccess ma, Method m, Field field | m = ma.getMethod() |
ma.getArgument(0) = this.asExpr() and
m instanceof PatternRegexMethod and
m.hasName("compile") and
//ma.getArgument(1).toString() = "Pattern.LITERAL" and
field instanceof PatternLiteral and
ma.getArgument(1) = field.getAnAccess()
)
}
}
// ******** HELPER CLASSES/METHODS (MAYBE MOVE ELSEWHERE?) ********
// TODO: move below to Regex.qll??
/** The Java class `java.util.regex.Pattern`. */
private class RegexPattern extends RefType {
RegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
/**
* The methods of the class `java.lang.String` that take a regular expression
* as a parameter.
*/
private class StringRegexMethod extends Method {
StringRegexMethod() {
this.getDeclaringType() instanceof TypeString and
this.hasName(["matches", "split", "replaceFirst", "replaceAll"])
}
}
// /** The Java class `java.util.regex.Matcher`. */
// private class RegexMatcher extends RefType {
// RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
// }
/** The Java class `org.apache.commons.lang3.RegExUtils`. */
private class ApacheRegExUtils extends RefType {
ApacheRegExUtils() { this.hasQualifiedName("org.apache.commons.lang3", "RegExUtils") }
/**
* The methods of the class `java.util.regex.Pattern` that take a regular
* expression as a parameter.
*/
private class PatternRegexMethod extends Method {
PatternRegexMethod() {
this.getDeclaringType() instanceof TypeRegexPattern and
this.hasName(["compile", "matches"])
}
}
/** The `quote` method of the `java.util.regex.Pattern` class. */
private class PatternQuoteMethod extends Method {
PatternQuoteMethod() { this.hasName(["quote"]) }
}
/** The `LITERAL` field of the `java.util.regex.Pattern` class. */
private class PatternLiteral extends Field {
PatternLiteral() {
this.getDeclaringType() instanceof TypeRegexPattern and
this.hasName("LITERAL")
}
}
/**
* The methods of the class `org.apache.commons.lang3.RegExUtils` that take
* a regular expression of type `String` as a parameter.
*/
private class ApacheRegExUtilsMethod extends Method {
ApacheRegExUtilsMethod() {
this.getDeclaringType() instanceof TypeApacheRegExUtils and
// only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern`
this.getParameterType(1) instanceof TypeString and
this.hasName([
"removeAll", "removeFirst", "removePattern", "replaceAll", "replaceFirst", "replacePattern"
])
}
}

View File

@@ -5,9 +5,7 @@ import semmle.code.java.dataflow.FlowSources
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.security.RegexInjection
/**
* A taint-tracking configuration for untrusted user input used to construct regular expressions.
*/
/** A taint-tracking configuration for untrusted user input used to construct regular expressions. */
class RegexInjectionConfiguration extends TaintTracking::Configuration {
RegexInjectionConfiguration() { this = "RegexInjection" }

View File

@@ -84,20 +84,55 @@ public class RegexInjectionTest extends HttpServlet {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
// TODO: add a "Safe" test for situation when this return value is stored in a variable, then the variable is used in the regex
escapeSpecialRegexChars(pattern);
// BAD: the pattern is not really sanitized
return input.matches("^" + pattern + "=.*$"); // $ hasRegexInjection
}
public boolean pattern7(javax.servlet.http.HttpServletRequest request) {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
String escapedPattern = escapeSpecialRegexChars(pattern);
// Safe: User input is sanitized before constructing the regex
return input.matches("^" + escapedPattern + "=.*$");
}
public boolean pattern8(javax.servlet.http.HttpServletRequest request) {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
// Safe: User input is sanitized before constructing the regex
return input.matches("^" + sanitizeSpecialRegexChars(pattern) + "=.*$");
}
public boolean pattern9(javax.servlet.http.HttpServletRequest request) {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
// Safe: User input is sanitized before constructing the regex
return input.matches("^" + sanitiseSpecialRegexChars(pattern) + "=.*$");
}
Pattern SPECIAL_REGEX_CHARS = Pattern.compile("[{}()\\[\\]><-=!.+*?^$\\\\|]");
// TODO: check if any other inbuilt escape/sanitizers in Java APIs
// test `escape...regex`
String escapeSpecialRegexChars(String str) {
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
}
// test `sanitize...regex`
String sanitizeSpecialRegexChars(String str) {
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
}
// test `sanitise...regex`
String sanitiseSpecialRegexChars(String str) {
return SPECIAL_REGEX_CHARS.matcher(str).replaceAll("\\\\$0");
}
public boolean apache1(javax.servlet.http.HttpServletRequest request) {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
@@ -148,12 +183,20 @@ public class RegexInjectionTest extends HttpServlet {
return RegExUtils.replacePattern(input, pattern, "").length() > 0; // $ hasRegexInjection
}
// from https://rules.sonarsource.com/java/RSPEC-2631 to test for Pattern.quote as safe
public boolean validate(javax.servlet.http.HttpServletRequest request) {
// test `Pattern.quote` as safe
public boolean quoteTest(javax.servlet.http.HttpServletRequest request) {
String regex = request.getParameter("regex");
String input = request.getParameter("input");
return input.matches(Pattern.quote(regex)); // Safe: with Pattern.quote, metacharacters or escape sequences will be given no special meaning
return input.matches(Pattern.quote(regex)); // Safe
}
// test `Pattern.LITERAL` as safe
public boolean literalTest(javax.servlet.http.HttpServletRequest request) {
String pattern = request.getParameter("pattern");
String input = request.getParameter("input");
return Pattern.compile(pattern, Pattern.LITERAL).matcher(input).matches(); // Safe
}
}