Swift: Expand parse mode support to include NSRegularExpression options.

This commit is contained in:
Geoffrey White
2023-07-18 13:27:36 +01:00
parent cd1e73bd65
commit cf7311f3f1
3 changed files with 119 additions and 28 deletions

View File

@@ -36,22 +36,23 @@ abstract class RegexCreation extends DataFlow::Node {
* created from.
*/
abstract DataFlow::Node getStringInput();
/**
* Gets a dataflow node for the options input that might contain parse mode
* flags (if any).
*/
DataFlow::Node getOptionsInput() { none() }
}
/**
* A data-flow node where a `Regex` or `NSRegularExpression` object is created.
* A data-flow node where a `Regex` object is created.
*/
private class StandardRegexCreation extends RegexCreation {
private class RegexRegexCreation extends RegexCreation {
DataFlow::Node input;
StandardRegexCreation() {
RegexRegexCreation() {
exists(CallExpr call |
(
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
call.getStaticTarget()
.(Method)
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
) and
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) and
input.asExpr() = call.getArgument(0).getExpr() and
this.asExpr() = call
)
@@ -60,6 +61,29 @@ private class StandardRegexCreation extends RegexCreation {
override DataFlow::Node getStringInput() { result = input }
}
/**
* A data-flow node where an `NSRegularExpression` object is created.
*/
private class NSRegularExpressionRegexCreation extends RegexCreation {
DataFlow::Node input;
NSRegularExpressionRegexCreation() {
exists(CallExpr call |
call.getStaticTarget()
.(Method)
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)") and
input.asExpr() = call.getArgument(0).getExpr() and
this.asExpr() = call
)
}
override DataFlow::Node getStringInput() { result = input }
override DataFlow::Node getOptionsInput() {
result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr()
}
}
newtype TRegexParseMode =
MkIgnoreCase() or // case insensitive
MkVerbose() or // ignores whitespace and `#` comments within patterns
@@ -94,25 +118,29 @@ class RegexAdditionalFlowStep extends Unit {
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
/**
* Holds if the step from `node1` to `node2` either sets (`isSet` = true)
* or unsets (`isSet` = false) parse mode `mode` for the regular expression.
* Holds if a regular expression parse mode is either set (`isSet` = true)
* or unset (`isSet` = false) at `node`. Parse modes propagate through
* array construction and regex constuction.
*/
abstract predicate modifiesParseMode(
DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet
);
abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet);
}
/**
* An additional flow step for `Regex` or `NSRegularExpression`.
* An additional flow step for `Regex`.
*/
class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
this.modifiesParseMode(nodeFrom, nodeTo, _, _)
this.setsParseModeEdge(nodeFrom, nodeTo, _, _)
}
override predicate modifiesParseMode(
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
this.setsParseModeEdge(_, node, mode, isSet)
}
private predicate setsParseModeEdge(
DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet
) {
// `Regex` methods that modify parse mode
exists(CallExpr ce |
nodeFrom.asExpr() = ce.getQualifier() and
nodeTo.asExpr() = ce and
@@ -135,6 +163,56 @@ class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
}
}
/**
* An additional flow step for `NSRegularExpression`.
*/
class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
// `NSRegularExpression.Options` values
node.asExpr()
.(MemberRefExpr)
.getMember()
.(FieldDecl)
.hasQualifiedName("NSRegularExpression.Options", "caseInsensitive") and
mode = MkIgnoreCase() and
isSet = true
or
node.asExpr()
.(MemberRefExpr)
.getMember()
.(FieldDecl)
.hasQualifiedName("NSRegularExpression.Options", "allowCommentsAndWhitespace") and
mode = MkVerbose() and
isSet = true
or
node.asExpr()
.(MemberRefExpr)
.getMember()
.(FieldDecl)
.hasQualifiedName("NSRegularExpression.Options", "dotMatchesLineSeparators") and
mode = MkDotAll() and
isSet = true
or
node.asExpr()
.(MemberRefExpr)
.getMember()
.(FieldDecl)
.hasQualifiedName("NSRegularExpression.Options", "anchorsMatchLines") and
mode = MkMultiLine() and
isSet = true
or
node.asExpr()
.(MemberRefExpr)
.getMember()
.(FieldDecl)
.hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and
mode = MkUnicode() and
isSet = true
}
}
/**
* A call that evaluates a regular expression. For example, the call to `firstMatch` in:
* ```
@@ -174,7 +252,7 @@ abstract class RegexEval extends CallExpr {
RegexParseMode getAParseMode() {
exists(DataFlow::Node setNode |
// parse mode flag is set
any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) and
any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and
// reaches this eval
RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput()))
)

View File

@@ -53,15 +53,15 @@ module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
/**
* A data flow configuration for tracking regular expression parse mode
* flags from the point they are set to the point of use. The flow state
* encodes which parse mode flag was set.
* flags from wherever they are created or set through to regular expression
* evaluation. The flow state encodes which parse mode flag was set.
*/
private module RegexParseModeConfig implements DataFlow::StateConfigSig {
class FlowState = RegexParseMode;
predicate isSource(DataFlow::Node node, FlowState flowstate) {
// parse mode flag is set
any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, flowstate, true)
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, true)
}
predicate isSink(DataFlow::Node node, FlowState flowstate) {
@@ -73,11 +73,24 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig {
predicate isBarrier(DataFlow::Node node) { none() }
predicate isBarrier(DataFlow::Node node, FlowState flowstate) {
// parse mode flag is set or unset
any(RegexAdditionalFlowStep s).modifiesParseMode(node, _, flowstate, _)
// parse mode flag is unset
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, false)
}
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// flow through array construction
exists(ArrayExpr arr |
nodeFrom.asExpr() = arr.getAnElement() and
nodeTo.asExpr() = arr
)
or
// flow through regex creation
exists(RegexCreation create |
nodeFrom = create.getOptionsInput() and
nodeTo = create
)
or
// additional flow steps for regular expression objects
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
}

View File

@@ -227,12 +227,12 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
_ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE
// parse modes set through NSRegularExpression
_ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE
_ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL
_ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE"
_ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=IGNORECASE
_ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=DOTALL
_ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE"
let myOptions1 : NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators]
_ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE"
_ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE"
// parse modes set through other methods