Swift: Port regex mode flag character fix from Python.

This commit is contained in:
Geoffrey White
2023-09-12 22:18:17 +01:00
parent df60f560a2
commit 200d9a4dfb
5 changed files with 47 additions and 46 deletions

View File

@@ -277,9 +277,10 @@ abstract class RegExp extends Expr {
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
/**
* Holds if a parse mode starts between `start` and `end`.
* Holds if the initial part of a parse mode, not containing any
* mode characters is between `start` and `end`.
*/
private predicate flagGroupStart(int start, int end) {
private predicate flagGroupStartNoModes(int start, int end) {
this.isGroupStart(start) and
this.getChar(start + 1) = "?" and
this.getChar(start + 2) in ["i", "x", "s", "m", "w"] and
@@ -287,17 +288,35 @@ abstract class RegExp extends Expr {
}
/**
* Holds if a parse mode group is between `start` and `end`, and includes the
* mode flag `c`. For example the following span, with mode flag `i`:
* Holds if `pos` contains a mode character from the
* flag group starting at `start`.
*/
private predicate modeCharacter(int start, int pos) {
this.flagGroupStartNoModes(start, pos)
or
this.modeCharacter(start, pos - 1) and
this.getChar(pos) in ["i", "x", "s", "m", "w"]
}
/**
* Holds if a parse mode group is between `start` and `end`.
*/
private predicate flagGroupStart(int start, int end) {
this.flagGroupStartNoModes(start, _) and
end = max(int i | this.modeCharacter(start, i) | i + 1)
}
/**
* Holds if a parse mode group of this regex includes the mode flag `c`.
* For example the following parse mode group, with mode flag `i`:
* ```
* (?i)
* ```
*/
private predicate flagGroup(int start, int end, string c) {
exists(int inStart, int inEnd |
this.flagGroupStart(start, inStart) and
this.groupContents(start, end, inStart, inEnd) and
this.getChar([inStart .. inEnd - 1]) = c
private predicate flag(string c) {
exists(int pos |
this.modeCharacter(_, pos) and
this.getChar(pos) = c
)
}
@@ -305,7 +324,7 @@ abstract class RegExp extends Expr {
* Gets a mode of this regular expression string if it is defined by a mode prefix.
*/
string getModeFromPrefix() {
exists(string c | this.flagGroup(_, _, c) |
exists(string c | this.flag(c) |
c = "i" and result = "IGNORECASE" // case insensitive
or
c = "x" and result = "VERBOSE" // ignores whitespace and `#` comments within patterns

View File

@@ -1618,16 +1618,13 @@ redos_variants.swift:
# 142| [RegExpConstant, RegExpNormalChar] !
# 146| [RegExpGroup] (?s)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
# 146| [RegExpZeroWidthMatch] (?s)
# 146| [RegExpSequence] (?s)(.|\n)*!
#-----| 0 -> [RegExpGroup] (?s)
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
#-----| 1 -> [RegExpStar] (.|\n)*
#-----| 2 -> [RegExpConstant, RegExpNormalChar] !
# 146| [RegExpConstant, RegExpNormalChar] s
# 146| [RegExpGroup] (.|\n)
#-----| 0 -> [RegExpAlt] .|\n
@@ -6492,47 +6489,38 @@ regex.swift:
# 206| [RegExpNamedCharacterProperty] [:aaaaa:]
# 211| [RegExpGroup] (?i)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
# 211| [RegExpZeroWidthMatch] (?i)
# 211| [RegExpSequence] (?i)abc
#-----| 0 -> [RegExpGroup] (?i)
#-----| 0 -> [RegExpZeroWidthMatch] (?i)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 211| [RegExpConstant, RegExpNormalChar] i
# 211| [RegExpConstant, RegExpNormalChar] abc
# 212| [RegExpGroup] (?s)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
# 212| [RegExpZeroWidthMatch] (?s)
# 212| [RegExpSequence] (?s)abc
#-----| 0 -> [RegExpGroup] (?s)
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 212| [RegExpConstant, RegExpNormalChar] s
# 212| [RegExpConstant, RegExpNormalChar] abc
# 213| [RegExpGroup] (?is)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] is
# 213| [RegExpZeroWidthMatch] (?is)
# 213| [RegExpSequence] (?is)abc
#-----| 0 -> [RegExpGroup] (?is)
#-----| 0 -> [RegExpZeroWidthMatch] (?is)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 213| [RegExpConstant, RegExpNormalChar] is
# 213| [RegExpConstant, RegExpNormalChar] abc
# 214| [RegExpGroup] (?i-s)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i-s
#-----| 0 -> [RegExpConstant, RegExpNormalChar] -s
# 214| [RegExpSequence] (?i-s)abc
#-----| 0 -> [RegExpGroup] (?i-s)
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
# 214| [RegExpConstant, RegExpNormalChar] i-s
# 214| [RegExpConstant, RegExpNormalChar] -s
# 214| [RegExpConstant, RegExpNormalChar] abc
@@ -6540,13 +6528,10 @@ regex.swift:
# 217| [RegExpSequence] abc(?i)def
#-----| 0 -> [RegExpConstant, RegExpNormalChar] abc
#-----| 1 -> [RegExpGroup] (?i)
#-----| 1 -> [RegExpZeroWidthMatch] (?i)
#-----| 2 -> [RegExpConstant, RegExpNormalChar] def
# 217| [RegExpGroup] (?i)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
# 217| [RegExpConstant, RegExpNormalChar] i
# 217| [RegExpZeroWidthMatch] (?i)
# 217| [RegExpConstant, RegExpNormalChar] def
@@ -6558,16 +6543,13 @@ regex.swift:
#-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi
# 218| [RegExpGroup] (?i:def)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def
#-----| 0 -> [RegExpConstant, RegExpNormalChar] :def
# 218| [RegExpConstant, RegExpNormalChar] i:def
# 218| [RegExpConstant, RegExpNormalChar] :def
# 218| [RegExpConstant, RegExpNormalChar] ghi
# 219| [RegExpGroup] (?i)
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
# 219| [RegExpConstant, RegExpNormalChar] i
# 219| [RegExpZeroWidthMatch] (?i)
# 219| [RegExpConstant, RegExpNormalChar] abc

View File

@@ -211,7 +211,7 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
_ = try Regex("(?i)abc").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc
_ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc
_ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc
_ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc MISSING: modes=IGNORECASE SPURIOUS: modes="DOTALL | IGNORECASE"
_ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc modes=IGNORECASE
// these cases use parse modes on localized areas of the regex, which we don't currently support
_ = try Regex("abc(?i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i)def

View File

@@ -2,7 +2,7 @@
| ReDoS.swift:65:22:65:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| ReDoS.swift:66:22:66:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| ReDoS.swift:69:18:69:18 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| ReDoS.swift:73:26:73:33 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'isx' and containing many repetitions of '\\n'. |
| ReDoS.swift:73:26:73:33 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of '\\n'. |
| ReDoS.swift:77:46:77:46 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| ReDoS.swift:79:57:79:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
| ReDoS.swift:82:57:82:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

View File

@@ -70,7 +70,7 @@ func myRegexpTests(myUrl: URL) throws {
let regex = try Regex(str)
_ = try regex.firstMatch(in: tainted)
_ = try Regex(#"(?is)X(?:.|\n)*Y"#) // BAD - suggested attack should begin with 'x' or 'X', *not* 'isx' or 'isX' [WRONG]
_ = try Regex(#"(?is)X(?:.|\n)*Y"#) // BAD - suggested attack should begin with 'x' or 'X', *not* 'isx' or 'isX'
// NSRegularExpression