JS: Sharpen recognition of string 'match' calls

This commit is contained in:
Asger Feldthaus
2021-06-15 09:32:37 +02:00
parent 0ebf53b9df
commit 5838e54a46
5 changed files with 55 additions and 22 deletions

View File

@@ -922,6 +922,36 @@ private predicate isNativeStringMethod(Function func, string name) {
)
}
/**
* Holds if `name` is the name of a property on a Match object returned by `String.prototype.match`,
* not including array indices.
*/
private predicate isMatchObjectProperty(string name) {
any(ExternalInstanceMemberDecl decl).hasQualifiedName("Array", name)
or
name in ["length", "index", "input", "groups"]
}
/** Holds if `call` is a call to `match` whose result is used in a way that is incompatible with Match objects. */
private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
call.getMethodName() = "match" and
call.getNumArgument() = 1 and
(
// Accessing a property that is absent on Match objects
exists(string propName |
exists(call.getAPropertyRead(propName)) and
not isMatchObjectProperty(propName) and
not exists(propName.toInt())
)
or
// Awaiting the result
call.flowsToExpr(any(AwaitExpr await).getOperand())
or
// Result is obviously unused
call.asExpr() = any(ExprStmt stmt).getExpr()
)
}
/**
* Holds if `source` may be interpreted as a regular expression.
*/
@@ -939,7 +969,10 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
not isNativeStringMethod(func, methodName)
)
|
methodName = "match" and source = mce.getArgument(0) and mce.getNumArgument() = 1
methodName = "match" and
source = mce.getArgument(0) and
mce.getNumArgument() = 1 and
not isUsedAsNonMatchObject(mce)
or
methodName = "search" and
source = mce.getArgument(0) and

View File

@@ -5,7 +5,7 @@
| tst-IncompleteHostnameRegExp.js:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:7:2:7:31 | /^http: ... .com\\// | here |
| tst-IncompleteHostnameRegExp.js:10:3:10:36 | ^http:\\/\\/test.example.com\\/(?:.*) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:10:2:10:37 | /^http: ... (?:.*)/ | here |
| tst-IncompleteHostnameRegExp.js:11:14:11:37 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
| tst-IncompleteHostnameRegExp.js:12:11:12:34 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
| tst-IncompleteHostnameRegExp.js:12:15:12:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:14:12:39 | "^http: ... le.com" | here |
| tst-IncompleteHostnameRegExp.js:15:23:15:46 | ^http://test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:15:13:15:50 | id(id(i ... com"))) | here |
| tst-IncompleteHostnameRegExp.js:19:18:19:34 | ^test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
| tst-IncompleteHostnameRegExp.js:22:28:22:44 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |

View File

@@ -3,7 +3,7 @@
| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:12:14:12:39 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:15:22:15:47 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
@@ -51,18 +51,18 @@
| tst-SemiAnchoredRegExp.js:96:2:96:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored at the beginning, but the other parts of this regular expression are not |
| tst-SemiAnchoredRegExp.js:97:2:97:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored at the beginning, but the other parts of this regular expression are not |
| tst-SemiAnchoredRegExp.js:98:2:98:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not |
| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:5:43:5:62 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:6:43:6:64 | /^https ... od.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:7:43:7:87 | "(^http ... 2.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:8:43:8:86 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:3:47:3:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:4:58:4:76 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:5:47:5:66 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:6:47:6:68 | /^https ... od.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:7:47:7:91 | "(^http ... 2.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:8:47:8:90 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:19:43:19:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:19:47:19:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:20:47:20:70 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |

View File

@@ -9,7 +9,7 @@
/^http:\/\/(?:.+)\\.test\\.example.com\//; // NOT OK
/^http:\/\/test.example.com\/(?:.*)/; // OK
new RegExp("^http://test.example.com"); // NOT OK
s.match("^http://test.example.com"); // NOT OK
if (s.match("^http://test.example.com")) {} // NOT OK
function id(e) { return e; }
new RegExp(id(id(id("^http://test.example.com")))); // NOT OK

View File

@@ -1,11 +1,11 @@
(function(x){
"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
"http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK
"http://evil.com/?http://good.com".match("^https?://good.com"); // NOT OK - missing post-anchor
"http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // NOT OK - missing post-anchor
"http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // NOT OK - missing post-anchor
"http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK - missing post-anchor
if ("http://evil.com/?http://good.com".match("https?://good.com")) {} // NOT OK
if ("http://evil.com/?http://good.com".match(new RegExp("https?://good.com"))) {} // NOT OK
if ("http://evil.com/?http://good.com".match("^https?://good.com")) {} // NOT OK - missing post-anchor
if ("http://evil.com/?http://good.com".match(/^https?:\/\/good.com/)) {} // NOT OK - missing post-anchor
if ("http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)")) {} // NOT OK - missing post-anchor
if ("http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)")) {} // NOT OK - missing post-anchor
/https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK
new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK
@@ -14,10 +14,10 @@
new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK
"something".match("other"); // OK
"something".match("x.commissary"); // OK
"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
"http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK
if ("something".match("other")) {} // OK
if ("something".match("x.commissary")) {} // OK
if ("http://evil.com/?http://good.com".match("https?://good.com")) {} // NOT OK
if ("http://evil.com/?http://good.com".match("https?://good.com:8080")) {} // NOT OK
let trustedUrls = [
"https?://good.com", // NOT OK, referenced below