mirror of
https://github.com/github/codeql.git
synced 2026-02-26 11:53:42 +01:00
Java: Improve Regex flag parsing
Fixes: - Flag `d` not being recognized - Syntax for disabling flags (`-`) not being recognized - Non-capturing group with flags erroneously containing `:` as literal
This commit is contained in:
@@ -479,7 +479,7 @@ abstract class RegexString extends StringLiteral {
|
||||
private predicate flagGroupStartNoModes(int start, int end) {
|
||||
this.isGroupStart(start) and
|
||||
this.getChar(start + 1) = "?" and
|
||||
this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and
|
||||
this.getChar(start + 2) in ["-", "i", "d", "m", "s", "u", "x", "U"] and
|
||||
end = start + 2
|
||||
}
|
||||
|
||||
@@ -491,7 +491,7 @@ abstract class RegexString extends StringLiteral {
|
||||
this.flagGroupStartNoModes(start, pos)
|
||||
or
|
||||
this.modeCharacter(start, pos - 1) and
|
||||
this.getChar(pos) in ["i", "m", "s", "u", "x", "U"]
|
||||
this.getChar(pos) in ["-", "i", "d", "m", "s", "u", "x", "U"]
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -499,7 +499,10 @@ abstract class RegexString extends StringLiteral {
|
||||
*/
|
||||
private predicate flagGroupStart(int start, int end) {
|
||||
this.flagGroupStartNoModes(start, _) and
|
||||
end = max(int i | this.modeCharacter(start, i) | i + 1)
|
||||
// Check if this is a capturing group with flags, and therefore the `:` should be excluded
|
||||
exists(int maybeEnd | maybeEnd = max(int i | this.modeCharacter(start, i) | i + 1) |
|
||||
if this.getChar(maybeEnd) = ":" then end = maybeEnd + 1 else end = maybeEnd
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -510,9 +513,15 @@ abstract class RegexString extends StringLiteral {
|
||||
* ```
|
||||
*/
|
||||
private predicate flag(string c) {
|
||||
exists(int pos |
|
||||
this.modeCharacter(_, pos) and
|
||||
this.getChar(pos) = c
|
||||
exists(int start, int pos |
|
||||
this.modeCharacter(start, pos) and
|
||||
this.getChar(pos) = c and
|
||||
// Ignore if flag is disabled; use `<=` to also exclude `-` itself
|
||||
// This does not properly handle the (contrived) case where a flag is both enabled and
|
||||
// disabled, e.g. `(?i-i)a+`, in which case the flag seems to acts as if it was disabled
|
||||
not exists(int minusPos |
|
||||
this.modeCharacter(start, minusPos) and this.getChar(minusPos) = "-" and minusPos <= pos
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -524,6 +533,8 @@ abstract class RegexString extends StringLiteral {
|
||||
exists(string c | this.flag(c) |
|
||||
c = "i" and result = "IGNORECASE"
|
||||
or
|
||||
c = "d" and result = "UNIXLINES"
|
||||
or
|
||||
c = "m" and result = "MULTILINE"
|
||||
or
|
||||
c = "s" and result = "DOTALL"
|
||||
@@ -930,13 +941,13 @@ class Regex extends RegexString {
|
||||
|
||||
/**
|
||||
* Gets a mode (if any) of this regular expression. Can be any of:
|
||||
* DEBUG
|
||||
* IGNORECASE
|
||||
* MULTILINE
|
||||
* DOTALL
|
||||
* UNICODE
|
||||
* VERBOSE
|
||||
* UNICODECLASS
|
||||
* - IGNORECASE
|
||||
* - UNIXLINES
|
||||
* - MULTILINE
|
||||
* - DOTALL
|
||||
* - UNICODE
|
||||
* - VERBOSE
|
||||
* - UNICODECLASS
|
||||
*/
|
||||
string getAMode() {
|
||||
result != "None" and
|
||||
@@ -946,7 +957,7 @@ class Regex extends RegexString {
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if this regex is used to match against a full string,
|
||||
* Holds if this regex is used to match against a full string,
|
||||
* as though it was implicitly surrounded by ^ and $.
|
||||
*/
|
||||
predicate matchesFullString() { matches_full_string = true }
|
||||
|
||||
Reference in New Issue
Block a user