Merge pull request #21310 from owen-mc/java/regex-execution

Java: Add RegexMatch concept and recognise `@Pattern` annotation as sanitizer
This commit is contained in:
Owen Mansel-Chan
2026-02-16 09:11:47 +00:00
committed by GitHub
17 changed files with 857 additions and 392 deletions

View File

@@ -0,0 +1,75 @@
/**
* Provides abstract classes representing generic concepts such as file system
* access or system command execution, for which individual framework libraries
* provide concrete subclasses.
*/
overlay[local?]
module;
import java
/**
* A module importing the frameworks that implement `RegexMatch`es,
* ensuring that they are visible to the concepts library.
*/
private module Frameworks {
private import semmle.code.java.JDK
private import semmle.code.java.frameworks.JavaxAnnotations
}
/**
* An expression that represents a regular expression match.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `RegexMatch::Range` instead.
*
* These are either method calls, which return `true` when there is a match, or
* annotations, which are considered to match if they are present.
*/
class RegexMatch extends Expr instanceof RegexMatch::Range {
/** Gets the expression for the regex being executed by this node. */
Expr getRegex() { result = super.getRegex() }
/** Gets an expression for the string to be searched or matched against. */
Expr getString() { result = super.getString() }
/** Gets an expression to be sanitized. */
Expr getASanitizedExpr() { result = [this.getString(), super.getAdditionalSanitizedExpr()] }
/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the
* type-qualified name if possible.
*/
string getName() { result = super.getName() }
}
/** Provides classes for modeling regular-expression execution APIs. */
module RegexMatch {
/**
* An expression that executes a regular expression.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `RegexMatch` instead.
*
* These are either method calls, which return `true` when there is a match, or
* annotations, which are considered to match if they are present.
*/
abstract class Range extends Expr {
/** Gets the expression for the regex being executed by this node. */
abstract Expr getRegex();
/** Gets an expression for the string to be searched or matched against. */
abstract Expr getString();
/** Gets an additional expression to be sanitized, if any. */
Expr getAdditionalSanitizedExpr() { none() }
/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the
* type-qualified name if possible.
*/
abstract string getName();
}
}

View File

@@ -48,13 +48,19 @@ class StringContainsMethod extends Method {
}
/** A call to the `java.lang.String.matches` method. */
class StringMatchesCall extends MethodCall {
class StringMatchesCall extends MethodCall, RegexMatch::Range {
StringMatchesCall() {
exists(Method m | m = this.getMethod() |
m.getDeclaringType() instanceof TypeString and
m.hasName("matches")
)
}
override Expr getRegex() { result = this.getArgument(0) }
override Expr getString() { result = this.getQualifier() }
override string getName() { result = "String.matches" }
}
/** A call to the `java.lang.String.replaceAll` method. */

View File

@@ -163,3 +163,38 @@ class WebServiceAnnotation extends Annotation {
class WebServiceRefAnnotation extends Annotation {
WebServiceRefAnnotation() { this.getType().hasQualifiedName("javax.xml.ws", "WebServiceRef") }
}
/*
* Annotations in the package `javax.validation.constraints`.
*/
/**
* A `@javax.validation.constraints.Pattern` annotation.
*/
class PatternAnnotation extends Annotation, RegexMatch::Range {
PatternAnnotation() {
this.getType()
.hasQualifiedName(["javax.validation.constraints", "jakarta.validation.constraints"],
"Pattern")
}
override Expr getRegex() { result = this.getValue("regexp") }
override Expr getString() {
// Annotation on field accessed by direct read - value of field will match regexp
result.(FieldRead).getField() = this.getAnnotatedElement()
or
// Annotation on field accessed by getter - value of field will match regexp
result.(MethodCall).getMethod().(GetterMethod).getField() = this.getAnnotatedElement()
or
// Annotation on parameter - value of parameter will match regexp
result.(VarRead).getVariable().(Parameter) = this.getAnnotatedElement()
or
// Annotation on method - return value of method will match regexp
result.(Call).getCallee() = this.getAnnotatedElement()
// TODO - we could also consider the case where the annotation is on a type
// but this harder to model and not very common.
}
override string getName() { result = "@javax.validation.constraints.Pattern annotation" }
}

View File

@@ -3,6 +3,7 @@ overlay[local?]
module;
import java
private import semmle.code.java.dataflow.DataFlow
/** The class `java.util.regex.Matcher`. */
class TypeRegexMatcher extends Class {
@@ -24,6 +25,16 @@ class TypeRegexPattern extends Class {
TypeRegexPattern() { this.hasQualifiedName("java.util.regex", "Pattern") }
}
/**
* The `compile` method of `java.util.regex.Pattern`.
*/
class PatternCompileMethod extends Method {
PatternCompileMethod() {
this.getDeclaringType() instanceof TypeRegexPattern and
this.hasName("compile")
}
}
/**
* The `matches` method of `java.util.regex.Pattern`.
*/
@@ -59,3 +70,66 @@ class PatternLiteralField extends Field {
this.hasName("LITERAL")
}
}
/** A call to the `compile` method of `java.util.regex.Pattern`. */
class PatternCompileCall extends MethodCall {
PatternCompileCall() { this.getMethod() instanceof PatternCompileMethod }
}
/** A call to the `matcher` method of `java.util.regex.Pattern`. */
class PatternMatcherCall extends MethodCall {
PatternMatcherCall() { this.getMethod() instanceof PatternMatcherMethod }
}
/** A call to the `matches` method of `java.util.regex.Pattern`. */
class PatternMatchesCall extends MethodCall, RegexMatch::Range {
PatternMatchesCall() { this.getMethod() instanceof PatternMatchesMethod }
override Expr getRegex() { result = this.getArgument(0) }
override Expr getString() { result = this.getArgument(1) }
override string getName() { result = "Pattern.matches" }
}
/** A call to the `matches` method of `java.util.regex.Matcher`. */
class MatcherMatchesCall extends MethodCall, RegexMatch::Range {
MatcherMatchesCall() { this.getMethod() instanceof MatcherMatchesMethod }
/**
* Gets the call to `java.util.regex.Pattern.matcher` that returned the
* qualifier of this call. This is needed to determine the string being
* matched.
*/
PatternMatcherCall getPatternMatcherCall() {
DataFlow::localExprFlow(result, this.getQualifier())
}
/**
* Gets the call to `java.util.regex.Pattern.compile` that returned the
* `Pattern` used by this matcher. This is needed to determine the regular
* expression being used.
*/
PatternCompileCall getPatternCompileCall() {
DataFlow::localExprFlow(result, this.getPatternMatcherCall())
}
override Expr getRegex() { result = this.getPatternCompileCall().getArgument(0) }
override Expr getString() { result = this.getPatternMatcherCall().getArgument(0) }
override Expr getAdditionalSanitizedExpr() {
// Special case for MatcherMatchesCall. Consider the following code:
//
// Matcher matcher = Pattern.compile(regexp).matcher(taintedInput);
// if (matcher.matches()) {
// sink(matcher.group(1));
// }
//
// Even though the string is `taintedInput`, we also want to sanitize
// `matcher` as it can be used to get substrings of `taintedInput`.
result = this.getQualifier()
}
override string getName() { result = "Matcher.matches" }
}

View File

@@ -427,20 +427,15 @@ private class ReplaceDirectoryCharactersSanitizer extends StringReplaceOrReplace
}
}
/** Holds if `target` is the first argument of `matchesCall`. */
private predicate isMatchesTarget(StringMatchesCall matchesCall, CompileTimeConstantExpr target) {
target = matchesCall.getArgument(0)
}
/**
* Holds if `matchesCall` confirms that `checkedExpr` does not contain any directory characters
* on the given `branch`.
*/
private predicate isMatchesCall(StringMatchesCall matchesCall, Expr checkedExpr, boolean branch) {
private predicate isMatchesCall(RegexMatch regexMatch, Expr checkedExpr, boolean branch) {
exists(CompileTimeConstantExpr target, string targetValue |
isMatchesTarget(matchesCall, target) and
target = regexMatch.getRegex() and
target.getStringValue() = targetValue and
checkedExpr = matchesCall.getQualifier()
checkedExpr = regexMatch.getString()
|
(
// Allow anything except `.`, '/', '\'

View File

@@ -41,24 +41,10 @@ class SimpleTypeSanitizer extends DataFlow::Node {
* make the type recursive. Otherwise use `RegexpCheckBarrier`.
*/
predicate regexpMatchGuardChecks(Guard guard, Expr e, boolean branch) {
exists(Method method, MethodCall mc |
method = mc.getMethod() and
guard = mc and
exists(RegexMatch rm | not rm instanceof Annotation |
guard = rm and
e = rm.getASanitizedExpr() and
branch = true
|
// `String.matches` and other `matches` methods.
method.getName() = "matches" and
e = mc.getQualifier()
or
method instanceof PatternMatchesMethod and
e = mc.getArgument(1)
or
method instanceof MatcherMatchesMethod and
exists(MethodCall matcherCall |
matcherCall.getMethod() instanceof PatternMatcherMethod and
e = matcherCall.getArgument(0) and
DataFlow::localExprFlow(matcherCall, mc.getQualifier())
)
)
}
@@ -70,5 +56,10 @@ predicate regexpMatchGuardChecks(Guard guard, Expr e, boolean branch) {
class RegexpCheckBarrier extends DataFlow::Node {
RegexpCheckBarrier() {
this = DataFlow::BarrierGuard<regexpMatchGuardChecks/3>::getABarrierNode()
or
// Annotations don't fit into the model of barrier guards because the
// annotation doesn't dominate the sanitized expression, so we instead
// treat them as barriers directly.
exists(RegexMatch rm | rm instanceof Annotation | this.asExpr() = rm.getString())
}
}

View File

@@ -31,11 +31,9 @@ private class ExternalRegexInjectionSanitizer extends RegexInjectionSanitizer {
*/
private class PatternLiteralFlag extends RegexInjectionSanitizer {
PatternLiteralFlag() {
exists(MethodCall ma, Method m, PatternLiteralField field | m = ma.getMethod() |
ma.getArgument(0) = this.asExpr() and
m.getDeclaringType() instanceof TypeRegexPattern and
m.hasName("compile") and
ma.getArgument(1) = field.getAnAccess()
exists(PatternCompileCall pcc, PatternLiteralField field |
pcc.getArgument(0) = this.asExpr() and
pcc.getArgument(1) = field.getAnAccess()
)
}
}