diff --git a/shared/regex/codeql/regex/MissingRegExpAnchor.qll b/shared/regex/codeql/regex/MissingRegExpAnchor.qll index e2e8b71ea75..c4fe642b790 100644 --- a/shared/regex/codeql/regex/MissingRegExpAnchor.qll +++ b/shared/regex/codeql/regex/MissingRegExpAnchor.qll @@ -15,6 +15,11 @@ import HostnameRegexp as HostnameShared signature module MissingRegExpAnchorSig< RegexTreeViewSig TreeImpl, HostnameShared::HostnameRegexpSig Specific> { + /** + * Holds if this regular expression is used in a 'replacement' operation, such + * as replacing all matches of the regular expression in the input string + * with another string. + */ predicate isUsedAsReplace(Specific::RegExpPatternSource pattern); /** Gets a string representation of an end anchor from a regular expression. */ diff --git a/swift/ql/lib/codeql/swift/regex/Regex.qll b/swift/ql/lib/codeql/swift/regex/Regex.qll index c24a5d692c0..aedf561047b 100644 --- a/swift/ql/lib/codeql/swift/regex/Regex.qll +++ b/swift/ql/lib/codeql/swift/regex/Regex.qll @@ -330,6 +330,13 @@ abstract class RegexEval extends CallExpr { */ DataFlow::Node getAnOptionsInput() { none() } + /** + * Holds if this regular expression evaluation is a 'replacement' operation, + * such as replacing all matches of the regular expression in the input + * string with another string. + */ + abstract predicate isUsedAsReplace(); + /** * Gets a regular expression value that is evaluated here (if any can be identified). */ @@ -416,6 +423,10 @@ private class AlwaysRegexEval extends RegexEval { override DataFlow::Node getRegexInputNode() { result = regexInput } override DataFlow::Node getStringInputNode() { result = stringInput } + + override predicate isUsedAsReplace() { + this.getStaticTarget().getName().matches(["replac%", "stringByReplac%", "trim%"]) + } } /** @@ -508,4 +519,6 @@ private class NSStringCompareOptionsRegexEval extends RegexEval instanceof NSStr override DataFlow::Node getAnOptionsInput() { result = this.(NSStringCompareOptionsPotentialRegexEval).getAnOptionsInput() } + + override predicate isUsedAsReplace() { this.getStaticTarget().getName().matches(["replac%"]) } } diff --git a/swift/ql/src/queries/Security/CWE-020/MissingRegexAnchor.ql b/swift/ql/src/queries/Security/CWE-020/MissingRegexAnchor.ql index fc1b9e1b227..be6360187f7 100644 --- a/swift/ql/src/queries/Security/CWE-020/MissingRegexAnchor.ql +++ b/swift/ql/src/queries/Security/CWE-020/MissingRegexAnchor.ql @@ -22,35 +22,10 @@ private module Impl implements MissingRegExpAnchor::MissingRegExpAnchorSig { predicate isUsedAsReplace(RegexPatternSource pattern) { - none() -/* java // is used for capture or replace - exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() | - name = "exec" and - mcn = pattern.getARegExpObject().getAMethodCall() and - exists(mcn.getAPropertyRead()) - or - exists(DataFlow::Node arg | - arg = mcn.getArgument(0) and - ( - pattern.getARegExpObject().flowsTo(arg) or - pattern.getAParse() = arg - ) - | - name = "replace" - or - name = "match" and exists(mcn.getAPropertyRead()) - ) - )*/ -/* rb exists(DataFlow::CallNode mcn, DataFlow::Node arg, string name | - name = mcn.getMethodName() and - arg = mcn.getArgument(0) - | - ( - pattern.getAParse().(DataFlow::LocalSourceNode).flowsTo(arg) or - pattern.getAParse() = arg - ) and - name = ["sub", "sub!", "gsub", "gsub!"] - )*/ + exists(RegexEval eval | + eval.getARegex() = pattern.asExpr() and + eval.isUsedAsReplace() + ) } string getEndAnchorText() { result = "$" } diff --git a/swift/ql/test/query-tests/Security/CWE-020/MissingRegexAnchor.expected b/swift/ql/test/query-tests/Security/CWE-020/MissingRegexAnchor.expected index 136f52fa21e..553d53846e6 100644 --- a/swift/ql/test/query-tests/Security/CWE-020/MissingRegexAnchor.expected +++ b/swift/ql/test/query-tests/Security/CWE-020/MissingRegexAnchor.expected @@ -35,7 +35,6 @@ | SemiAnchoredRegex.swift:104:16:104:16 | ^mouse\|touch\|click\|contextmenu\|drop\|dragover\|dragend | Misleading operator precedence. The subexpression '^mouse' is anchored at the beginning, but the other parts of this regular expression are not | | SemiAnchoredRegex.swift:105:16:105:16 | ^xxx:\|yyy: | Misleading operator precedence. The subexpression '^xxx:' is anchored at the beginning, but the other parts of this regular expression are not | | SemiAnchoredRegex.swift:106:16:106:16 | _xxx\|_yyy\|_zzz$ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not | -| SemiAnchoredRegex.swift:137:36:137:36 | ^a\|b/ | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not | | UnanchoredUrlRegex.swift:62:39:62:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | UnanchoredUrlRegex.swift:63:39:63:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. | | UnanchoredUrlRegex.swift:64:39:64:39 | ^https?://good.com | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. | diff --git a/swift/ql/test/query-tests/Security/CWE-020/SemiAnchoredRegex.swift b/swift/ql/test/query-tests/Security/CWE-020/SemiAnchoredRegex.swift index f072c91ab69..3b0abe53048 100644 --- a/swift/ql/test/query-tests/Security/CWE-020/SemiAnchoredRegex.swift +++ b/swift/ql/test/query-tests/Security/CWE-020/SemiAnchoredRegex.swift @@ -134,5 +134,5 @@ func realWorld(input: String) throws { func replaceTest(x: String) -> String { // OK - possibly replacing too much, but not obviously a problem - return x.replacingOccurrences(of: #"^a|b/"#, with: "", options: .regularExpression) // [FALSE POSITIVE] + return x.replacingOccurrences(of: #"^a|b/"#, with: "", options: .regularExpression) }