Swift: Support multiple parse mode flags.

This commit is contained in:
Geoffrey White
2023-07-10 18:08:23 +01:00
parent f50345659e
commit 6e80021c4e
4 changed files with 46 additions and 27 deletions

View File

@@ -24,9 +24,9 @@ abstract class RegExp extends Expr {
predicate isIgnoreCase() { this.getAMode() = "IGNORECASE" }
/**
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
* Gets a string repreenting the flags for this `RegExp`, or the empty string if it has no flags.
*/
string getFlags() { result = concat(string mode | mode = this.getAMode() | mode, " | ")}
string getFlags() { result = concat(string mode | mode = this.getAMode() | mode, " | ") }
/**
* Helper predicate for `charSetStart(int start, int end)`.
@@ -275,27 +275,34 @@ abstract class RegExp extends Expr {
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
/**
* Holds if `start` and `end` are the range of the mode prefix substring (if any) of this
* regular expression, and `c` is a mode prefix character specified in it. For example
* in the following regular expression, `start` is `0`, `end` is `3` and `c` is `i`.
* Holds if a parse mode prefix starts between `start` and `end`. For example:
* ```
* (?i)one|two
* (?i)
* ```
*/
private predicate flagGroupStart(int start, int end, string c) {
// TODO: I believe this fails with multiple mode specifiers such as (?is) at the moment.
private predicate flagGroupStart(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
end = start + 3 and
c = this.getChar(start + 2) and
c in ["i", "m", "s", "u", "x", "U"]
end = start + 2
}
/**
* Holds if a parse mode prefix group is between `start` and `end`, and includes the
* mode flag `c`.
*/
private predicate flagGroup(int start, int end, string c) {
exists(int inStart, int inEnd |
this.flagGroupStart(start, inStart) and
this.groupContents(start, end, inStart, inEnd) and
this.getChar([inStart .. inEnd - 1]) = c
)
}
/**
* Gets a mode of this regular expression string if it is defined by a mode prefix.
*/
string getModeFromPrefix() {
exists(string c | this.flagGroupStart(_, _, c) |
exists(string c | this.flagGroup(_, _, c) |
// TODO: are these correct in Swift?
c = "i" and result = "IGNORECASE"
or
@@ -322,10 +329,13 @@ abstract class RegExp extends Expr {
* UNICODECLASS
*/
string getAMode() {
/* TODO
result != "None" and
usedAsRegex(this, result, _)
or*/
/*
* TODO
* result != "None" and
* usedAsRegex(this, result, _)
* or
*/
result = this.getModeFromPrefix()
}
@@ -709,7 +719,7 @@ abstract class RegExp extends Expr {
or
this.simpleGroupStart(start, end)
or
this.flagGroupStart(start, end, _)
this.flagGroupStart(start, end)
}
/** Matches the start of a non-capturing group, e.g. `(?:` */

View File

@@ -1618,13 +1618,16 @@ redos_variants.swift:
# 142| [RegExpConstant, RegExpNormalChar] !
# 146| [RegExpZeroWidthMatch] (?s)
# 146| [RegExpGroup] (?s)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
# 146| [RegExpSequence] (?s)(.|\n)*!
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
#-----| 0 -> [RegExpGroup] (?s)
#-----| 1 -> [RegExpStar] (.|\n)*
#-----| 2 -> [RegExpConstant, RegExpNormalChar] !
# 146| [RegExpConstant, RegExpNormalChar] s
# 146| [RegExpGroup] (.|\n)
#-----| 0 -> [RegExpAlt] .|\n
@@ -6489,30 +6492,36 @@ regex.swift:
# 205| [RegExpNamedCharacterProperty] [:aaaaa:]
# 209| [RegExpZeroWidthMatch] (?i)
# 209| [RegExpGroup] (?i)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
# 209| [RegExpSequence] (?i)abc
#-----| 0 -> [RegExpZeroWidthMatch] (?i)
#-----| 0 -> [RegExpGroup] (?i)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 209| [RegExpConstant, RegExpNormalChar] i
# 209| [RegExpConstant, RegExpNormalChar] abc
# 210| [RegExpZeroWidthMatch] (?s)
# 210| [RegExpGroup] (?s)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
# 210| [RegExpSequence] (?s)abc
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
#-----| 0 -> [RegExpGroup] (?s)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 210| [RegExpConstant, RegExpNormalChar] s
# 210| [RegExpConstant, RegExpNormalChar] abc
# 211| [RegExpGroup] (?is)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
#-----| 0 -> [RegExpConstant, RegExpNormalChar] is
# 211| [RegExpSequence] (?is)abc
#-----| 0 -> [RegExpGroup] (?is)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 211| [RegExpConstant, RegExpNormalChar] s
# 211| [RegExpConstant, RegExpNormalChar] is
# 211| [RegExpConstant, RegExpNormalChar] abc

View File

@@ -34,7 +34,7 @@ module RegexTest implements TestSig {
location = eval.getLocation() and
element = eval.toString() and
tag = "modes" and
value = regex.getFlags() and
value = quote(regex.getFlags()) and
value != ""
)
}

View File

@@ -208,7 +208,7 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
_ = try Regex("(?i)abc").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc
_ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc
_ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input regex=(?is)abc MISSING: modes="DOTALL | IGNORECASE" SPURIOUS: modes=IGNORECASE
_ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc
_ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL
_ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc