JS: add query js/regex/missing-regexp-anchor

2025-12-16 16:53:25 +01:00 · 2019-05-31 08:45:20 +02:00
parent 69db54a03a
commit 0fa73b8331
11 changed files with 460 additions and 0 deletions
--- a/change-notes/1.21/analysis-javascript.md
+++ b/change-notes/1.21/analysis-javascript.md
@@ -27,6 +27,7 @@

 | **Query**                                     | **Tags**                                             | **Purpose**                                                                                                                                                                 |
 |-----------------------------------------------|------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Missing regular expression anchor (`js/regex/missing-regexp-anchor`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression patterns that may be missing an anchor, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are not shown on LGTM by default. |
 | Prototype pollution (`js/prototype-pollution`)    | security, external/cwe-250, external/cwe-400 | Highlights code that allows an attacker to modify a built-in prototype object through an unsanitized recursive merge function. The results are shown on LGTM by default. |

 ## Changes to existing queries
--- a/javascript/config/suites/javascript/security
+++ b/javascript/config/suites/javascript/security
@@ -3,6 +3,7 @@
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022
 + semmlecode-javascript-queries/Security/CWE-022/ZipSlip.ql: /Security/CWE/CWE-022
 + semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.qhelp
@@ -0,0 +1,77 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+	<overview>
+		<p>
+
+			Sanitizing untrusted input with regular expressions is a
+			common technique.  However, it is error prone to match untrusted input
+			against regular expressions without anchors such as <code>^</code> or
+			<code>$</code>.  Malicious input can bypass such security checks by
+			embedding one of the allowed patterns in an unexpected location.
+
+		</p>
+
+		<p>
+
+			Even if the matching is not done in a security-critical
+			context, it may still cause undesirable behaviors when the regular
+			expression matches accidentally.
+
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+
+			Use anchors to ensure that regular expressions match at
+			the expected locations.
+
+		</p>
+	</recommendation>
+
+	<example>
+
+		<p>
+
+			The following example code checks that a URL redirection
+			will reach the <code>example.com</code> domain, or one of its
+			subdomains, and not some malicious site.
+
+		</p>
+
+		<sample src="examples/MissingRegExpAnchor_BAD.js"/>
+
+		<p>
+
+			The check with the regular expression match is, however, easy to bypass. For example
+			by embedding <code>example.com</code> in the path component:
+			<code>http://evil-example.net/example.com</code>, or in the query
+			string component: <code>http://evil-example.net/?x=example.com</code>.
+
+			Address these shortcomings by using anchors in the regular expression instead:
+
+		</p>
+
+		<sample src="examples/MissingRegExpAnchor_GOOD.js"/>
+
+		<p>
+
+			A related mistake is to write a regular expression with
+			multiple alternatives, but to only include an anchor for one of the
+			alternatives. As an example, the regular expression
+			<code>/^www\\.example\\.com|beta\\.example\\.com/</code> will match the host
+			<code>evil.beta.example.com</code> because the regular expression is parsed
+			as <code>/(^www\\.example\\.com)|(beta\\.example\\.com)/</code>
+
+		</p>
+	</example>
+
+	<references>
+		<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">Regular Expressions</a></li>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
+	</references>
+</qhelp>
--- a/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
+++ b/javascript/ql/src/Security/CWE-020/MissingRegExpAnchor.ql
@@ -0,0 +1,86 @@
+/**
+ * @name Missing regular expression anchor
+ * @description Regular expressions without anchors can be vulnerable to bypassing.
+ * @kind problem
+ * @problem.severity warning
+ * @precision medium
+ * @id js/regex/missing-regexp-anchor
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-20
+ */
+
+import javascript
+
+/**
+ * Holds if `src` is a pattern for a collection of alternatives where
+ * only the first or last alternative is anchored, indicating a
+ * precedence mistake explained by `msg`.
+ *
+ * The canonical example of such a mistake is: `^a|b|c`, which is
+ * parsed as `(^a)|(b)|(c)`.
+ */
+predicate isAnInterestingSemiAnchoredRegExpString(RegExpPatternSource src, string msg) {
+  exists(string str, string maybeGroupedStr, string regex, string anchorPart, string posString, string escapedDot |
+    // a dot that might be escaped in a regular expression, for example `/\./` or new `RegExp('\\.')`
+    escapedDot = "\\\\\\\\?[.]" and
+    // a string that is mostly free from special reqular expression symbols
+    str = "(?:(?:" + escapedDot + ")|[a-z:/.?_,@0-9 -])+" and
+    // the string may be wrapped in parentheses
+    maybeGroupedStr = "(?:" + str + "|\\(" + str + "\\))" and
+    (
+      // a problematic pattern: `^a|b|...|x`
+      regex = "(?i)(\\^" + maybeGroupedStr + ")(?:\\|" + maybeGroupedStr + ")+" and
+      posString = "beginning"
+      or
+      // a problematic pattern: `a|b|...|x$`
+      regex = "(?i)(?:" + maybeGroupedStr + "\\|)+(" + maybeGroupedStr + "\\$)" and
+      posString = "end"
+    ) and
+    anchorPart = src.getPattern().regexpCapture(regex, 1) and
+    anchorPart.regexpMatch("(?i).*[a-z].*") and
+    msg = "The alternative '" + anchorPart + "' uses an anchor to match from the " + posString +
+        " of a string, but the other alternatives of this regular expression do not use anchors."
+  )
+}
+
+/**
+ * Holds if `src` is an unanchored pattern for a URL, indicating a
+ * mistake explained by `msg`.
+ */
+predicate isAnInterestingUnanchoredRegExpString(RegExpPatternSource src, string msg) {
+  exists(string pattern | pattern = src.getPattern() |
+    // a substring sequence of a protocol and subdomains, perhaps with some regex characters mixed in, followed by a known TLD
+    pattern
+        .regexpMatch("(?i)[():|?a-z0-9-\\\\./]+[.]" + RegExpPatterns::commonTLD() +
+            "([/#?():]\\S*)?") and
+    // without any anchors
+    pattern.regexpMatch("[^$^]+") and
+    // that is not used for capture or replace
+    not exists(DataFlow::MethodCallNode mcn, string name | name = mcn.getMethodName() |
+      name = "exec" and
+      mcn = src.getARegExpObject().getAMethodCall() and
+      exists(mcn.getAPropertyRead())
+      or
+      exists(DataFlow::Node arg |
+        arg = mcn.getArgument(0) and
+        (
+          src.getARegExpObject().flowsTo(arg) or
+          src.(StringRegExpPatternSource).getAUse() = arg
+        )
+      |
+        name = "replace"
+        or
+        name = "match" and exists(mcn.getAPropertyRead())
+      )
+    ) and
+    msg = "When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it."
+  )
+}
+
+from DataFlow::Node nd, string msg
+where
+  isAnInterestingUnanchoredRegExpString(nd, msg)
+  or
+  isAnInterestingSemiAnchoredRegExpString(nd, msg)
+select nd, msg
--- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_BAD.js
@@ -0,0 +1,7 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url");
+    // BAD: the host of `url` may be controlled by an attacker
+	if (url.match(/https?:\/\/www\.example\.com\//)) {
+        res.redirect(url);
+    }
+});
--- a/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
+++ b/javascript/ql/src/Security/CWE-020/examples/MissingRegExpAnchor_GOOD.js
@@ -0,0 +1,7 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url");
+    // GOOD: the host of `url` can not be controlled by an attacker
+	if (url.match(/^https?:\/\/www\.example\.com\//)) {
+        res.redirect(url);
+    }
+});
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected
@@ -22,3 +22,6 @@
 | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:48:41:48:68 | '^https ... e\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:68 | '^http: ... e\\.com' | here |
 | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
+| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | This regular expression has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | here |
+| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | here |
+| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | This string, which is used as a regular expression $@, has an unescaped '.' before 'com\|better.com', so it might match more hosts than expected. | tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | here |
--- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected
@@ -0,0 +1,49 @@
+| tst-SemiAnchoredRegExp.js:3:2:3:7 | /^a\|b/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:6:2:6:9 | /^a\|b\|c/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:12:2:12:9 | /^a\|(b)/ | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:14:2:14:11 | /^(a)\|(b)/ | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:17:2:17:7 | /a\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:20:2:20:9 | /a\|b\|c$/ | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:26:2:26:9 | /(a)\|b$/ | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:28:2:28:11 | /(a)\|(b)$/ | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:30:2:30:23 | /^good. ... er.com/ | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:31:2:31:25 | /^good\\ ... r\\.com/ | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:32:2:32:27 | /^good\\ ... \\\\.com/ | The alternative '^good\\\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:37:13:37:18 | "^a\|b" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:40:13:40:20 | "^a\|b\|c" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:46:13:46:20 | "^a\|(b)" | The alternative '^a' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:48:13:48:22 | "^(a)\|(b)" | The alternative '^(a)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:51:13:51:18 | "a\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:54:13:54:20 | "a\|b\|c$" | The alternative 'c$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:60:13:60:20 | "(a)\|b$" | The alternative 'b$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:62:13:62:22 | "(a)\|(b)$" | The alternative '(b)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:64:13:64:34 | '^good. ... er.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:65:13:65:36 | '^good\\ ... r\\.com' | The alternative '^good.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:66:13:66:38 | '^good\\ ... \\\\.com' | The alternative '^good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:75:2:75:27 | /(\\.xxx ... .zzz)$/ | The alternative '(\\.zzz)$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:77:2:77:23 | /\\.xxx\| ... zzz$/ig | The alternative '\\.zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:78:2:78:19 | /\\.xxx\|\\.yyy\|zzz$/ | The alternative 'zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:81:2:81:28 | /^(xxx  ...  yyy)/i | The alternative '^(xxx yyy zzz)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:83:2:83:24 | /^(xxx: ... (zzz:)/ | The alternative '^(xxx:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:84:2:84:23 | /^(xxx? ... zzz\\/)/ | The alternative '^(xxx?:)' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:85:2:85:16 | /^@media\|@page/ | The alternative '^@media' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:87:2:87:21 | /^click\|mouse\|touch/ | The alternative '^click' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:88:2:88:43 | /^http: ... r\\.com/ | The alternative '^http://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:89:2:89:47 | /^https ... r\\.com/ | The alternative '^https?://good\\.com' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:90:2:90:55 | /^mouse ... ragend/ | The alternative '^mouse' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:91:2:91:14 | /^xxx:\|yyy:/i | The alternative '^xxx:' uses an anchor to match from the beginning of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-SemiAnchoredRegExp.js:92:2:92:18 | /_xxx\|_yyy\|_zzz$/ | The alternative '_zzz$' uses an anchor to match from the end of a string, but the other alternatives of this regular expression do not use anchors. |
+| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:19:43:19:62 | "https?://good.com/" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:35:2:35:32 | /https? ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:49:11:49:51 | /youtub ... -_]+)/i | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
+| tst-UnanchoredUrlRegExp.js:77:11:77:32 | /vimeo\\ ... 0-9]+)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
--- a/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.qlref
@@ -0,0 +1 @@
+Security/CWE-020/MissingRegExpAnchor.ql
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-SemiAnchoredRegExp.js
@@ -0,0 +1,122 @@
+(function coreRegExp() {
+	/^a|/;
+	/^a|b/; // NOT OK
+	/a|^b/;
+	/^a|^b/;
+	/^a|b|c/; // NOT OK
+	/a|^b|c/;
+	/a|b|^c/;
+	/^a|^b|c/;
+
+	/(^a)|b/;
+	/^a|(b)/; // NOT OK
+	/^a|(^b)/;
+	/^(a)|(b)/; // NOT OK
+
+
+	/a|b$/; // NOT OK
+	/a$|b/;
+	/a$|b$/;
+	/a|b|c$/; // NOT OK
+	/a|b$|c/;
+	/a$|b|c/;
+	/a|b$|c$/;
+
+	/a|(b$)/;
+	/(a)|b$/; // NOT OK
+	/(a$)|b$/;
+	/(a)|(b)$/; // NOT OK
+
+	/^good.com|better.com/; // NOT OK
+	/^good\.com|better\.com/; // NOT OK
+	/^good\\.com|better\\.com/; // NOT OK
+});
+
+(function coreString() {
+	new RegExp("^a|");
+	new RegExp("^a|b"); // NOT OK
+	new RegExp("a|^b");
+	new RegExp("^a|^b");
+	new RegExp("^a|b|c"); // NOT OK
+	new RegExp("a|^b|c");
+	new RegExp("a|b|^c");
+	new RegExp("^a|^b|c");
+
+	new RegExp("(^a)|b");
+	new RegExp("^a|(b)"); // NOT OK
+	new RegExp("^a|(^b)");
+	new RegExp("^(a)|(b)"); // NOT OK
+
+
+	new RegExp("a|b$"); // NOT OK
+	new RegExp("a$|b");
+	new RegExp("a$|b$");
+	new RegExp("a|b|c$"); // NOT OK
+	new RegExp("a|b$|c");
+	new RegExp("a$|b|c");
+	new RegExp("a|b$|c$");
+
+	new RegExp("a|(b$)");
+	new RegExp("(a)|b$"); // NOT OK
+	new RegExp("(a$)|b$");
+	new RegExp("(a)|(b)$"); // NOT OK
+
+	new RegExp('^good.com|better.com'); // NOT OK
+	new RegExp('^good\.com|better\.com'); // NOT OK
+	new RegExp('^good\\.com|better\\.com'); // NOT OK
+});
+
+(function realWorld() {
+	// real-world examples that have been anonymized a bit
+
+	/*
+	 * NOT OK: flagged
+	 */
+	/(\.xxx)|(\.yyy)|(\.zzz)$/;
+	/(^left|right|center)\sbottom$/; // not flagged at the moment due to multiple anchors
+	/\.xxx|\.yyy|\.zzz$/ig;
+	/\.xxx|\.yyy|zzz$/;
+	/^(?:mouse|contextmenu)|click/; // not flagged at the moment due to nested alternatives
+	/^([A-Z]|xxx[XY]$)/; // not flagged at the moment due to multiple anchors
+	/^(xxx yyy zzz)|(xxx yyy)/i;
+	/^(xxx yyy zzz)|(xxx yyy)|(1st( xxx)? yyy)|xxx|1st/i; // not flagged at the moment due to nested parens
+	/^(xxx:)|(yyy:)|(zzz:)/;
+	/^(xxx?:)|(yyy:zzz\/)/;
+	/^@media|@page/;
+	/^\s*(xxx?|yyy|zzz):|xxx:yyy\//; // not flagged at the moment due to quantifiers
+	/^click|mouse|touch/;
+	/^http:\/\/good\.com|http:\/\/better\.com/;
+	/^https?:\/\/good\.com|https?:\/\/better\.com/;
+	/^mouse|touch|click|contextmenu|drop|dragover|dragend/;
+	/^xxx:|yyy:/i;
+	/_xxx|_yyy|_zzz$/;
+	/em|%$/; // not flagged at the moment due to the anchor not being for letters
+
+	/*
+	 * MAYBE OK due to apparent complexity: not flagged
+	 */
+	/(?:^[#?]?|&)([^=&]+)(?:=([^&]*))?/g;
+	/(^\s*|;\s*)\*.*;/m;
+	/(^\s*|\[)(?:xxx|yyy_(?:xxx|yyy)|xxx|yyy(?:xxx|yyy)?|xxx|yyy)\b/m;
+	/\s\S| \t|\t |\s$/;
+	/\{[^}{]*\{|\}[^}{]*\}|\{[^}]*$/g;
+	/^((\+|\-)\s*\d\d\d\d)|((\+|\-)\d\d\:?\d\d)/;
+	/^(\/\/)|([a-z]+:(\/\/)?)/;
+	/^[=?!#%@$]|!(?=[:}])/;
+	/^[\[\]!:]|[<>]/;
+	/^for\b|\b(?:xxx|yyy)\b/i;
+	/^if\b|\b(?:xxx|yyy|zzz)\b/i;
+
+	/*
+	 * OK: not flagged
+	 */
+	/$^|only-match/g;
+	/(#.+)|#$/;
+	/(NaN| {2}|^$)/;
+	/[^\n]*(?:\n|[^\n]$)/g;
+	/^$|\/(?:xxx|yyy)zzz/i;
+	/^(\/|(xxx|yyy|zzz)$)/;
+	/^9$|27/;
+	/^\+|\s*/g;
+	/xxx_yyy=\w+|^$/;
+});
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js
@@ -0,0 +1,106 @@
+(function(x){
+
+	"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
+	"http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK
+	"http://evil.com/?http://good.com".match("^https?://good.com"); // OK
+	"http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // OK
+	"http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // OK
+	"http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK, but not detected
+
+	/https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK
+	new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK
+
+	"http://evil.com/?http://good.com".search("https?://good.com"); // NOT OK
+
+	new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK
+
+	"something".match("other"); // OK
+	"something".match("x.commissary"); // OK
+	"http://evil.com/?http://good.com".match("https?://good.com/"); // NOT OK
+	"http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK
+
+	let trustedUrls = [
+		"https?://good.com", // NOT OK, referenced below
+		/https?:\/\/good.com/, // NOT OK, referenced below
+		new RegExp("https?://good.com"), // NOT OK, referenced below
+		"^https?://good.com"
+	];
+	function isTrustedUrl(url) {
+		for (let trustedUrl of trustedUrls) {
+			if (url.match(trustedUrl)) return true;
+		}
+		return false;
+	}
+
+	/https?:\/\/good.com\/([0-9]+)/.exec(url); // NOT OK
+	"https://verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK
+	"http" + (secure? "s": "") + "://" + "verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]; // OK
+	"http" + (secure? "s": "") + "://" + ("verygood.com/?id=" + /https?:\/\/good.com\/([0-9]+)/.exec(url)[0]); // OK
+
+	// g or .replace?
+	file = file.replace(
+		/https:\/\/cdn\.ampproject\.org\/v0\/amp-story-0\.1\.js/g,
+		hostName + '/dist/v0/amp-story-1.0.max.js'
+	);
+
+	// missing context of use
+	const urlPatterns  = [
+		{
+			regex: /youtube.com\/embed\/([a-z0-9\?&=\-_]+)/i,
+			type: 'iframe', w: 560, h: 314,
+			url: '//www.youtube.com/embed/$1',
+			allowFullscreen: true
+		}];
+
+	// ditto
+	F.helpers.media = {
+		defaults : {
+			youtube : {
+				matcher : /(youtube\.com|youtu\.be)\/(watch\?v=|v\/|u\/|embed\/?)?(videoseries\?list=(.*)|[\w-]{11}|\?listType=(.*)&list=(.*)).*/i,
+				params  : {
+					autoplay    : 1,
+					autohide    : 1,
+					fs          : 1,
+					rel         : 0,
+					hd          : 1,
+					wmode       : 'opaque',
+					enablejsapi : 1
+				},
+				type : 'iframe',
+				url  : '//www.youtube.com/embed/$3'
+			}}}
+
+	// ditto
+	var urlPatterns = [
+		{regex: /youtu\.be\/([\w\-.]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$1'},
+		{regex: /youtube\.com(.+)v=([^&]+)/, type: 'iframe', w: 425, h: 350, url: '//www.youtube.com/embed/$2'},
+		{regex: /vimeo\.com\/([0-9]+)/, type: 'iframe', w: 425, h: 350, url: '//player.vimeo.com/video/$1?title=0&byline=0&portrait=0&color=8dc7dc'},
+	];
+
+	// check optional successsor to TLD
+	new RegExp("(Pingdom.com_bot_version_)(\\d+)\\.(\\d+)")
+
+	// replace and spaces
+	error.replace(/See https:\/\/github\.com\/Squirrel\/Squirrel\.Mac\/issues\/182 for more information/, 'See [this link](https://github.com/Microsoft/vscode/issues/7426#issuecomment-425093469) for more information');
+
+	// not a url
+	var sharedScript = /<script\s.*src="(app:\/\/.+\.gaiamobile\.org)?\/?(shared\/.+)".*>/;
+
+	// replace
+	const repo = repoURL.replace(/http(s)?:\/\/(\d+\.)?github.com\//gi, '')
+
+	// replace and space
+	cmp.replace(/<option value="http:\/\/codemirror.net\/">HEAD<\/option>/,
+	            "<option value=\"http://codemirror.net/\">HEAD</option>\n        <option value=\"http://marijnhaverbeke.nl/git/codemirror?a=blob_plain;hb=" + number + ";f=\">" + number + "</option>");
+
+	// replace and space
+	const helpMsg = /For help see https:\/\/nodejs.org\/en\/docs\/inspector\s*/;
+	msg = msg.replace(helpMsg, '');
+
+	// not a url
+	pkg.source.match(/<a:skin.*?\s+xmlns:a="http:\/\/ajax.org\/2005\/aml"/m)
+
+	// replace
+	path.replace(/engine.io/, "$&-client")
+
+});