Extend RegexMatch framework to allow for MatcherMatchesCall edge case

This commit is contained in:
Owen Mansel-Chan
2026-02-15 14:37:16 +00:00
parent 8f8f4c2d52
commit d6b71a346e
3 changed files with 22 additions and 16 deletions

View File

@@ -33,6 +33,9 @@ class RegexMatch extends Expr instanceof RegexMatch::Range {
/** Gets an expression for the string to be searched or matched against. */
Expr getString() { result = super.getString() }
/** Gets an expression to be sanitized. */
Expr getASanitizedExpr() { result = [this.getString(), super.getAdditionalSanitizedExpr()] }
/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the
@@ -59,6 +62,9 @@ module RegexMatch {
/** Gets an expression for the string to be searched or matched against. */
abstract Expr getString();
/** Gets an additional expression to be sanitized, if any. */
Expr getAdditionalSanitizedExpr() { none() }
/**
* Gets the name of this regex match, typically the name of an executing
* method. This is used for nice alert messages and should include the

View File

@@ -118,5 +118,18 @@ class MatcherMatchesCall extends MethodCall, RegexMatch::Range {
override Expr getString() { result = this.getPatternMatcherCall().getArgument(0) }
override Expr getAdditionalSanitizedExpr() {
// Special case for MatcherMatchesCall. Consider the following code:
//
// Matcher matcher = Pattern.compile(regexp).matcher(taintedInput);
// if (matcher.matches()) {
// sink(matcher.group(1));
// }
//
// Even though the string is `taintedInput`, we also want to sanitize
// `matcher` as it can be used to get substrings of `taintedInput`.
result = this.getQualifier()
}
override string getName() { result = "Matcher.matches" }
}

View File

@@ -43,22 +43,9 @@ class SimpleTypeSanitizer extends DataFlow::Node {
predicate regexpMatchGuardChecks(Guard guard, Expr e, boolean branch) {
exists(RegexMatch rm | not rm instanceof Annotation |
guard = rm and
(
e = rm.getString()
or
// Special case for MatcherMatchesCall. Consider the following code:
//
// Matcher matcher = Pattern.compile(regexp).matcher(taintedInput);
// if (matcher.matches()) {
// sink(matcher.group(1));
// }
//
// Even though the string is `taintedInput`, we also want to sanitize
// `matcher` as it can be used to get substrings of `taintedInput`.
e = rm.(MatcherMatchesCall).getQualifier()
)
) and
branch = true
e = rm.getASanitizedExpr() and
branch = true
)
}
/**