Merge pull request #13778 from geoffw0/javaparsemode

Java: Understand multiple parse mode flags specified in a regular expression string
This commit is contained in:
yoff
2023-09-18 14:22:59 +02:00
committed by GitHub
5 changed files with 80 additions and 9 deletions

View File

@@ -472,12 +472,48 @@ abstract class RegexString extends StringLiteral {
)
}
private predicate flagGroupStart(int start, int end, string c) {
/**
* Holds if the initial part of a parse mode, not containing any
* mode characters is between `start` and `end`.
*/
private predicate flagGroupStartNoModes(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
end = start + 3 and
c = this.getChar(start + 2) and
c in ["i", "m", "s", "u", "x", "U"]
this.getChar(start + 2) in ["i", "m", "s", "u", "x", "U"] and
end = start + 2
}
/**
* Holds if `pos` contains a mode character from the
* flag group starting at `start`.
*/
private predicate modeCharacter(int start, int pos) {
this.flagGroupStartNoModes(start, pos)
or
this.modeCharacter(start, pos - 1) and
this.getChar(pos) in ["i", "m", "s", "u", "x", "U"]
}
/**
* Holds if a parse mode group is between `start` and `end`.
*/
private predicate flagGroupStart(int start, int end) {
this.flagGroupStartNoModes(start, _) and
end = max(int i | this.modeCharacter(start, i) | i + 1)
}
/**
* Holds if a parse mode group of this regex includes the mode flag `c`.
* For example the following parse mode group, with mode flag `i`:
* ```
* (?i)
* ```
*/
private predicate flag(string c) {
exists(int pos |
this.modeCharacter(_, pos) and
this.getChar(pos) = c
)
}
/**
@@ -485,7 +521,7 @@ abstract class RegexString extends StringLiteral {
* it is defined by a prefix.
*/
string getModeFromPrefix() {
exists(string c | this.flagGroupStart(_, _, c) |
exists(string c | this.flag(c) |
c = "i" and result = "IGNORECASE"
or
c = "m" and result = "MULTILINE"
@@ -540,7 +576,7 @@ abstract class RegexString extends StringLiteral {
private predicate groupStart(int start, int end) {
this.nonCapturingGroupStart(start, end)
or
this.flagGroupStart(start, end, _)
this.flagGroupStart(start, end)
or
this.namedGroupStart(start, end)
or