Merge pull request #13283 from asgerf/js/restrict-regex-search-function

JS: Be more conservative about flagging "search" call arguments as regex
This commit is contained in:
Asger F
2023-06-08 10:50:51 +02:00
committed by GitHub
7 changed files with 58 additions and 36 deletions

View File

@@ -958,6 +958,27 @@ private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
)
}
/**
* Holds if `value` is used in a way that suggests it returns a number.
*/
pragma[inline]
private predicate isUsedAsNumber(DataFlow::LocalSourceNode value) {
any(Comparison compare)
.hasOperands(value.getALocalUse().asExpr(), any(Expr e | e.analyze().getAType() = TTNumber()))
or
value.flowsToExpr(any(ArithmeticExpr e).getAnOperand())
or
value.flowsToExpr(any(UnaryExpr e | e.getOperator() = "-").getOperand())
or
value.flowsToExpr(any(IndexExpr expr).getPropertyNameExpr())
or
exists(DataFlow::CallNode call |
call.getCalleeName() =
["substring", "substr", "slice", "splice", "charAt", "charCodeAt", "codePointAt"] and
value.flowsTo(call.getAnArgument())
)
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
@@ -985,9 +1006,9 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
methodName = "search" and
source = mce.getArgument(0) and
mce.getNumArgument() = 1 and
// "search" is a common method name, and so we exclude chained accesses
// because `String.prototype.search` returns a number
not exists(PropAccess p | p.getBase() = mce.getEnclosingExpr())
// "search" is a common method name, and the built-in "search" method is rarely used,
// so to reduce FPs we also require that the return value appears to be used as a number.
isUsedAsNumber(mce)
)
or
exists(DataFlow::SourceNode schema | schema = JsonSchema::getAPartOfJsonSchema() |

View File

@@ -0,0 +1,6 @@
---
category: minorAnalysis
---
* Fixed an issue where calls to a method named `search` would lead to false positive alerts related to regular expressions.
This happened when the call was incorrectly seen as a call to `String.prototype.search`, since this function converts its first argument
to a regular expression. The analysis is now more restrictive about when to treat `search` calls as regular expression sinks.

View File

@@ -55,23 +55,23 @@ function emptyAlt3(x) {
}
function search(x) {
return x.search(/[a-z]*/); // NOT OK
return x.search(/[a-z]*/) > -1; // NOT OK
}
function search2(x) {
return x.search(/[a-z]/); // OK
return x.search(/[a-z]/) > -1; // OK
}
function lookahead(x) {
return x.search(/(?!x)/); // OK
return x.search(/(?!x)/) > -1; // OK
}
function searchPrefix(x) {
return x.search(/^(foo)?/); // NOT OK - `foo?` does not affect the returned index
return x.search(/^(foo)?/) > -1; // NOT OK - `foo?` does not affect the returned index
}
function searchSuffix(x) {
return x.search(/(foo)?$/); // OK - `foo?` affects the returned index
return x.search(/(foo)?$/) > -1; // OK - `foo?` affects the returned index
}
function wordBoundary(x) {

View File

@@ -49,7 +49,7 @@
| tst-UnanchoredUrlRegExp.js:8:47:8:90 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:13:48:13:66 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:19:47:19:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:20:47:20:70 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |

View File

@@ -10,7 +10,7 @@
/https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK
new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK
"http://evil.com/?http://good.com".search("https?://good.com"); // NOT OK
if ("http://evil.com/?http://good.com".search("https?://good.com") > -1) {} // NOT OK
new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK

View File

@@ -31,14 +31,12 @@ nodes
| RegExpInjection.js:41:26:41:30 | input |
| RegExpInjection.js:42:25:42:29 | input |
| RegExpInjection.js:42:25:42:29 | input |
| RegExpInjection.js:45:20:45:24 | input |
| RegExpInjection.js:45:20:45:24 | input |
| RegExpInjection.js:46:23:46:27 | input |
| RegExpInjection.js:46:23:46:27 | input |
| RegExpInjection.js:47:22:47:26 | input |
| RegExpInjection.js:47:22:47:26 | input |
| RegExpInjection.js:50:46:50:50 | input |
| RegExpInjection.js:50:46:50:50 | input |
| RegExpInjection.js:45:24:45:28 | input |
| RegExpInjection.js:45:24:45:28 | input |
| RegExpInjection.js:46:27:46:31 | input |
| RegExpInjection.js:46:27:46:31 | input |
| RegExpInjection.js:47:26:47:30 | input |
| RegExpInjection.js:47:26:47:30 | input |
| RegExpInjection.js:54:14:54:16 | key |
| RegExpInjection.js:54:14:54:27 | key.split(".") |
| RegExpInjection.js:54:14:54:42 | key.spl ... x => x) |
@@ -89,14 +87,12 @@ edges
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:41:26:41:30 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:42:25:42:29 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:42:25:42:29 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:45:20:45:24 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:45:20:45:24 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:46:23:46:27 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:46:23:46:27 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:47:22:47:26 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:47:22:47:26 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:50:46:50:50 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:50:46:50:50 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:45:24:45:28 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:45:24:45:28 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:46:27:46:31 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:46:27:46:31 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:47:26:47:30 | input |
| RegExpInjection.js:5:31:5:56 | input | RegExpInjection.js:47:26:47:30 | input |
| RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:5:31:5:56 | input |
| RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:5:31:5:56 | input |
| RegExpInjection.js:8:31:8:33 | key | RegExpInjection.js:8:23:8:45 | "\\\\b" + ... (.*)\\n" |
@@ -157,10 +153,9 @@ edges
| RegExpInjection.js:40:23:40:27 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:40:23:40:27 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:41:26:41:30 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:41:26:41:30 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:42:25:42:29 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:42:25:42:29 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:45:20:45:24 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:45:20:45:24 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:46:23:46:27 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:46:23:46:27 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:47:22:47:26 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:47:22:47:26 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:50:46:50:50 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:50:46:50:50 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:45:24:45:28 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:45:24:45:28 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:46:27:46:31 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:46:27:46:31 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:47:26:47:30 | input | RegExpInjection.js:5:39:5:56 | req.param("input") | RegExpInjection.js:47:26:47:30 | input | This regular expression is constructed from a $@. | RegExpInjection.js:5:39:5:56 | req.param("input") | user-provided value |
| RegExpInjection.js:54:14:54:52 | key.spl ... in("-") | RegExpInjection.js:5:13:5:28 | req.param("key") | RegExpInjection.js:54:14:54:52 | key.spl ... in("-") | This regular expression is constructed from a $@. | RegExpInjection.js:5:13:5:28 | req.param("key") | user-provided value |
| RegExpInjection.js:64:14:64:18 | input | RegExpInjection.js:60:39:60:56 | req.param("input") | RegExpInjection.js:64:14:64:18 | input | This regular expression is constructed from a $@. | RegExpInjection.js:60:39:60:56 | req.param("input") | user-provided value |
| RegExpInjection.js:87:14:87:55 | "^.*\\.( ... + ")$" | RegExpInjection.js:82:15:82:32 | req.param("input") | RegExpInjection.js:87:14:87:55 | "^.*\\.( ... + ")$" | This regular expression is constructed from a $@. | RegExpInjection.js:82:15:82:32 | req.param("input") | user-provided value |

View File

@@ -42,12 +42,12 @@ app.get('/findKey', function(req, res) {
if (maybeString.match(input)) {} // NOT OK
if (notString.match(input)) {} // OK
defString.search(input); // NOT OK
likelyString.search(input); // NOT OK
maybeString.search(input); // NOT OK
notString.search(input); // OK
if (defString.search(input) > -1) {} // NOT OK
if (likelyString.search(input) > -1) {} // NOT OK
if (maybeString.search(input) > -1) {} // NOT OK
if (notString.search(input) > -1) {} // OK
URI(`${protocol}://${host}${path}`).search(input); // OK, but still flagged [INCONSISTENCY]
URI(`${protocol}://${host}${path}`).search(input); // OK
URI(`${protocol}://${host}${path}`).search(input).href(); // OK
unknown.search(input).unknown; // OK
@@ -62,7 +62,7 @@ app.get('/findKey', function(req, res) {
Search.search(input); // OK!
new RegExp(input); // NOT OK
var sanitized = input.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
new RegExp(sanitized); // OK
});