Swift: Identify strings that are used in regular expressions properly.

This commit is contained in:
Geoffrey White
2023-06-12 13:38:37 +01:00
parent 712c3cc698
commit 2ccbdbdf87
4 changed files with 93 additions and 91 deletions

View File

@@ -4,65 +4,53 @@
import swift
import codeql.swift.dataflow.DataFlow
import codeql.swift.regex.RegexTreeView // re-export
private import internal.ParseRegex
//private import codeql.regex.internal.RegExpTracking as RegExpTracking
/**
* A node whose value may flow to a position where it is interpreted
* as a part of a regular expression.
* A data flow configuration for tracking string literals that are used as
* regular expressions.
*/
abstract class RegExpPatternSource extends DataFlow::Node {
/**
* Gets a node where the pattern of this node is parsed as a part of
* a regular expression.
*/
abstract DataFlow::Node getAParse();
private module RegexUseConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node node) { node.asExpr() instanceof StringLiteralExpr }
/**
* Gets the root term of the regular expression parsed from this pattern.
*/
abstract RegExpTerm getRegExpTerm();
predicate isSink(DataFlow::Node node) { node.asExpr() = any(RegexEval eval).getRegexInput() }
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
// flow through `Regex` initializer, i.e. from a string to a `Regex` object.
exists(CallExpr call |
(
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
call.getStaticTarget()
.(Method)
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
) and
nodeFrom.asExpr() = call.getArgument(0).getExpr() and
nodeTo.asExpr() = call
)
}
}
/* *
* A node whose string value may flow to a position where it is interpreted
* as a part of a regular expression.
*
private class StringRegExpPatternSource extends RegExpPatternSource {
private DataFlow::Node parse;
StringRegExpPatternSource() {
this = regExpSource(parse) and
// `regExpSource()` tracks both strings and regex literals, narrow it down to strings.
this.asExpr().getConstantValue().isString(_)
}
override DataFlow::Node getAParse() { result = parse }
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
}*/
private module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
/**
* TODO
* "(a|b).*"
* A string literal that is used as a regular expression in a regular
* expression evaluation. For example the string literal `"(a|b).*"` in:
* ```
* Regex("(a|b).*").firstMatch(in: myString)
* ```
*/
private class ParsedStringRegExp extends RegExp, StringLiteralExpr {
private DataFlow::Node parse;
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
RegexEval eval;
ParsedStringRegExp() {
//this = regExpSource(parse).asExpr().getExpr()
parse.asExpr() = this
ParsedStringRegex() {
RegexUseFlow::flow(DataFlow::exprNode(this), DataFlow::exprNode(eval.getRegexInput()))
}
DataFlow::Node getAParse() { result = parse }
/*
override predicate isDotAll() { none() }
override predicate isIgnoreCase() { none() }
override string getFlags() { none() }*/
/**
* Gets a call that evaluates this regular expression.
*/
RegexEval getEval() { result = eval }
}
/**
@@ -72,18 +60,23 @@ private class ParsedStringRegExp extends RegExp, StringLiteralExpr {
* ```
*/
abstract class RegexEval extends CallExpr {
Expr regex;
Expr input;
Expr regexInput;
Expr stringInput;
/**
* Gets the regular expression that is evaluated.
* Gets the input to this call that is the regular expression.
*/
Expr getRegex() { result = regex }
Expr getRegexInput() { result = regexInput }
/**
* Gets the input string the regular expression is evaluated on.
* Gets the input to this call that is the string the regular expression is evaluated on.
*/
Expr getInput() { result = input }
Expr getStringInput() { result = stringInput }
/**
* Gets a regular expression value that is evaluated here (if any can be identified).
*/
RegExp getARegex() { exists(ParsedStringRegex regex | regex.getEval() = this and result = regex) }
}
/**
@@ -94,8 +87,8 @@ private class AlwaysRegexEval extends RegexEval {
this.getStaticTarget()
.(Method)
.hasQualifiedName("Regex", ["firstMatch(in:)", "prefixMatch(in:)", "wholeMatch(in:)"]) and
regex = this.getQualifier() and
input = this.getArgument(0).getExpr()
regexInput = this.getQualifier() and
stringInput = this.getArgument(0).getExpr()
or
this.getStaticTarget()
.(Method)
@@ -107,8 +100,8 @@ private class AlwaysRegexEval extends RegexEval {
"replaceMatches(in:options:range:withTemplate:)",
"stringByReplacingMatches(in:options:range:withTemplate:)"
]) and
regex = this.getQualifier() and
input = this.getArgument(0).getExpr()
regexInput = this.getQualifier() and
stringInput = this.getArgument(0).getExpr()
or
this.getStaticTarget()
.(Method)
@@ -119,8 +112,8 @@ private class AlwaysRegexEval extends RegexEval {
"split(separator:maxSplits:omittingEmptySubsequences:)", "starts(with:)",
"trimmingPrefix(_:)", "wholeMatch(of:)"
]) and
regex = this.getArgument(0).getExpr() and
input = this.getQualifier()
regexInput = this.getArgument(0).getExpr() and
stringInput = this.getQualifier()
or
this.getStaticTarget()
.(Method)
@@ -131,7 +124,7 @@ private class AlwaysRegexEval extends RegexEval {
"replacing(_:with:maxReplacements:)", "replacing(_:with:subrange:maxReplacements:)",
"trimPrefix(_:)"
]) and
regex = this.getArgument(0).getExpr() and
input = this.getQualifier()
regexInput = this.getArgument(0).getExpr() and
stringInput = this.getQualifier()
}
}

View File

@@ -35,20 +35,20 @@ func myRegexpVariantsTests(myUrl: URL) throws {
let tainted = String(contentsOf: myUrl) // tainted
let untainted = "abcdef"
_ = try Regex(".*").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex(".*").firstMatch(in: tainted) // $ regex=.* input=tainted
_ = try Regex("a*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex("(a*)b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex("(a)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
_ = try Regex("((a*)*b)").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
_ = try Regex("a*b").firstMatch(in: tainted) // $ regex=a*b input=tainted
_ = try Regex("(a*)b").firstMatch(in: tainted) // $ regex=(a*)b input=tainted
_ = try Regex("(a)*b").firstMatch(in: tainted) // $ regex=(a)*b input=tainted
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ regex=(a*)*b input=tainted redos-vulnerable=
_ = try Regex("((a*)*b)").firstMatch(in: tainted) // $ regex=((a*)*b) input=tainted redos-vulnerable=
_ = try Regex("(a|aa?)b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
_ = try Regex("(a|aa?)b").firstMatch(in: tainted) // $ regex=(a|aa?)b input=tainted
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ regex=(a|aa?)*b input=tainted redos-vulnerable=
// from the qhelp:
_ = try Regex("^_(__|.)+_$").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
_ = try Regex("^_(__|[^_])+_$").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
_ = try Regex("^_(__|.)+_$").firstMatch(in: tainted) // $ regex=^_(__|.)+_$ input=tainted redos-vulnerable=
_ = try Regex("^_(__|[^_])+_$").firstMatch(in: tainted) // $ regex=^_(__|[^_])+_$ input=tainted
// TODO: test more variant expressions.
}

View File

@@ -12,15 +12,15 @@ module RegexTest implements TestSig {
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(RegexEval eval, Expr regex |
eval.getRegex() = regex and
location = regex.getLocation() and
element = regex.toString() and
eval.getARegex() = regex and
location = eval.getLocation() and
element = eval.toString() and
tag = "regex" and
value = quote(regex.toString())
)
or
exists(RegexEval eval, Expr input |
eval.getInput() = input and
eval.getStringInput() = input and
location = input.getLocation() and
element = input.toString() and
tag = "input" and

View File

@@ -96,42 +96,51 @@ class NSRegularExpression : NSObject {
// --- tests ---
func myRegexpMethodsTests() throws {
func myRegexpMethodsTests(b: Bool) throws {
let input = "abcdef"
let regex = try Regex(".*")
// --- Regex ---
_ = try regex.firstMatch(in: input) // $ regex=regex input=input
_ = try regex.prefixMatch(in: input) // $ regex=regex input=input
_ = try regex.wholeMatch(in: input) // $ regex=regex input=input
_ = try regex.firstMatch(in: input) // $ regex=.* input=input
_ = try regex.prefixMatch(in: input) // $ regex=.* input=input
_ = try regex.wholeMatch(in: input) // $ regex=.* input=input
// --- RangeReplaceableCollection ---
var inputVar = input
inputVar.replace(regex, with: "") // $ regex=regex input=&...
_ = input.replacing(regex, with: "") // $ regex=regex input=input
inputVar.trimPrefix(regex) // $ regex=regex input=&...
inputVar.replace(regex, with: "") // $ regex=.* input=&...
_ = input.replacing(regex, with: "") // $ regex=.* input=input
inputVar.trimPrefix(regex) // $ regex=.* input=&...
// --- StringProtocol ---
_ = input.range(of: ".*", options: .regularExpression, range: nil, locale: nil) // $ MISSING: regex=regex input=input
_ = input.replacingOccurrences(of: ".*", with: "", options: .regularExpression) // $ MISSING: regex=regex input=input
_ = input.range(of: ".*", options: .regularExpression, range: nil, locale: nil) // $ MISSING: regex=.* input=input
_ = input.replacingOccurrences(of: ".*", with: "", options: .regularExpression) // $ MISSING: regex=.* input=input
// --- NSRegularExpression ---
let nsregex = try NSRegularExpression(pattern: ".*")
_ = nsregex.numberOfMatches(in: input, options: [], range: NSRange(location: 0, length: input.utf16.count)) // $ regex=nsregex input=input
nsregex.enumerateMatches(in: input, range: NSMakeRange(0, input.utf16.count), using: {a, b, c in } ) // $ regex=nsregex input=input
_ = nsregex.matches(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
_ = nsregex.firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
_ = nsregex.rangeOfFirstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
_ = nsregex.replaceMatches(in: NSMutableString(string: input), range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=nsregex input="call to NSString.init(string:)"
_ = nsregex.stringByReplacingMatches(in: input, range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=nsregex input=input
_ = nsregex.numberOfMatches(in: input, options: [], range: NSRange(location: 0, length: input.utf16.count)) // $ regex=.* input=input
nsregex.enumerateMatches(in: input, range: NSMakeRange(0, input.utf16.count), using: {a, b, c in } ) // $ regex=.* input=input
_ = nsregex.matches(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
_ = nsregex.firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
_ = nsregex.rangeOfFirstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
_ = nsregex.replaceMatches(in: NSMutableString(string: input), range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=.* input="call to NSString.init(string:)"
_ = nsregex.stringByReplacingMatches(in: input, range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=.* input=input
// --- NSString ---
let inputNS = NSString(string: "abcdef")
_ = inputNS.range(of: "*", options: .regularExpression) // $ MISSING: regex=nsregex input=inputNS
_ = inputNS.replacingOccurrences(of: ".*", with: "", options: .regularExpression, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=nsregex input=inputNS
_ = inputNS.range(of: "*", options: .regularExpression) // $ MISSING: regex=.* input=inputNS
_ = inputNS.replacingOccurrences(of: ".*", with: "", options: .regularExpression, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS
// --- flow ---
let either_regex = try Regex(b ? ".*" : ".+")
_ = try either_regex.firstMatch(in: input) // $ regex=.* regex=.+ input=input
let base_str = "a"
let append_regex = try Regex(base_str + "b")
_ = try append_regex.firstMatch(in: input) // $ input=input MISSING: regex=ab
}