JS: add query js/incomplete-url-substring-sanitization

2025-12-17 01:03:14 +01:00 · 2018-12-03 14:52:31 +01:00
parent 3c00d4be6d
commit 229eea00dc
9 changed files with 235 additions and 0 deletions
--- a/javascript/config/suites/javascript/security
+++ b/javascript/config/suites/javascript/security
@@ -1,5 +1,6 @@
 + semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200
 + semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022
 + semmlecode-javascript-queries/Security/CWE-078/CommandInjection.ql: /Security/CWE/CWE-078
--- a/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp
+++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qhelp
@@ -0,0 +1,88 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+
+	<overview>
+		<p>
+
+			Sanitizing untrusted URLs is an important technique for
+			preventing attacks such as request forgeries and malicious
+			redirections. Usually, this is done by checking that the host of a URL
+			is in a set of allowed hosts.
+
+		</p>
+
+		<p>
+
+			However, it is notoriously error-prone to treat the URL as
+			a string and check if one of the allowed hosts is a substring of the
+			URL. Malicious URLs can bypass such security checks by embedding one
+			of the allowed hosts in an unexpected location.
+
+		</p>
+
+		<p>
+
+			Even if the substring check is not used in a
+			security-critical context, the incomplete check may still cause
+			undesirable behaviors when the check succeeds accidentally.
+
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+
+			Parse a URL before performing a check on its host value,
+			and ensure that the check handles arbitrary subdomain sequences
+			correctly.
+
+		</p>
+	</recommendation>
+
+	<example>
+
+		<p>
+
+			The following example code checks that a URL redirection
+			will reach the <code>example.com</code> domain, or one of its
+			subdomains, and not some malicious site.
+
+		</p>
+
+		<sample src="examples/IncompleteUrlSubstringSanitization_BAD1.js"/>
+
+		<p>
+
+			The substring check is, however, easy to bypass. For example
+			by embedding <code>example.com</code> in the path component:
+			<code>http://evil-example.net/example.com</code>, or in the query
+			string component: <code>http://evil-example.net/?x=example.com</code>.
+
+			Address these shortcomings by checking the host of the parsed URL instead:
+
+		</p>
+
+		<sample src="examples/IncompleteUrlSubstringSanitization_BAD2.js"/>
+
+		<p>
+
+			This is still not a sufficient check as the
+			following URLs bypass it: <code>http://evil-example.com</code>
+			<code>http://example.com.evil-example.net</code>.
+
+			Instead, use an explicit whitelist of allowed hosts to
+			make the redirect secure:
+
+		</p>
+
+		<sample src="examples/IncompleteUrlSubstringSanitization_GOOD.js"/>
+
+	</example>
+
+	<references>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
+		<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
+	</references>
+</qhelp>
--- a/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql
+++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql
@@ -0,0 +1,54 @@
+/**
+ * @name Incomplete URL substring sanitization
+ * @description Security checks on the substrings of an unparsed URL are often vulnerable to bypassing.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id js/incomplete-url-substring-sanitization
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-20
+ */
+
+import javascript
+private import semmle.javascript.dataflow.InferredTypes
+
+from DataFlow::MethodCallNode call, string name, DataFlow::Node substring, string target
+where
+      (name = "indexOf" or name = "includes" or name = "startsWith" or name = "endsWith") and
+      call.getMethodName() = name and
+      substring = call.getArgument(0) and
+      substring.mayHaveStringValue(target) and
+      // target contains a domain on a common TLD, and perhaps some other URL components
+      target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(com|org|edu|gov|uk|net)(:[0-9]+)?/?") and
+      // whitelist
+      not (
+        name = "indexOf" and
+        (
+          // arithmetic on the indexOf-result
+          any(ArithmeticExpr e).getAnOperand().getUnderlyingValue() = call.asExpr()
+          or
+          // non-trivial position check on the indexOf-result
+          exists(EqualityTest test, Expr n |
+            test.hasOperands(call.asExpr(), n) |
+            not n.getIntValue() = [-1..0]
+          )
+        )
+        or
+        // the leading dot in a subdomain sequence makes the suffix-check safe (if it is performed on the host of the url)
+        name = "endsWith" and
+        target.regexpMatch("(?i)\\.([a-z0-9-]+)(\\.[a-z0-9-]+)+")
+        or
+        // the trailing slash makes the prefix-check safe
+        (
+          name = "startsWith"
+          or
+          name = "indexOf" and
+          exists(EqualityTest test, Expr n |
+            test.hasOperands(call.asExpr(), n) and
+            n.getIntValue() = 0
+          )
+        ) and
+        target.regexpMatch(".*/")
+      )
+select call, "'$@' may be at an arbitrary position in the sanitized URL.", substring, target
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_BAD1.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_BAD1.js
@@ -0,0 +1,7 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url");
+    // BAD: the host of `url` may be controlled by an attacker
+    if (url.includes("example.com")) {
+        res.redirect(url);
+    }
+});
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_BAD2.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_BAD2.js
@@ -0,0 +1,8 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param("url"),
+        host = urlLib.parse(url).host;
+    // BAD: the host of `url` may be controlled by an attacker
+    if (host.includes("example.com")) {
+        res.redirect(url);
+    }
+});
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_GOOD.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSubstringSanitization_GOOD.js
@@ -0,0 +1,13 @@
+app.get('/some/path', function(req, res) {
+    let url = req.param('url'),
+        host = urlLib.parse(url).host;
+    // GOOD: the host of `url` can not be controlled by an attacker
+    let allowedHosts = [
+        'example.com',
+        'beta.example.com',
+        'www.example.com'
+    ];
+    if (allowedHosts.includes(host)) {
+        res.redirect(url);
+    }
+});
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.expected
@@ -0,0 +1,12 @@
+| tst-IncompleteUrlSubstringSanitization.js:4:5:4:27 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:4:15:4:26 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:5:5:5:27 | x.index ... e.net") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:5:15:5:26 | "secure.net" | secure.net |
+| tst-IncompleteUrlSubstringSanitization.js:6:5:6:28 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:6:15:6:27 | ".secure.com" | .secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:10:5:10:27 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:10:15:10:26 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:11:5:11:27 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:11:15:11:26 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:12:5:12:27 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:12:15:12:26 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:14:5:14:38 | x.start ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:14:18:14:37 | "https://secure.com" | https://secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:15:5:15:28 | x.endsW ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:15:16:15:27 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:20:5:20:28 | x.inclu ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:20:16:20:27 | "secure.com" | secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:32:5:32:35 | x.index ... e.com") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:32:15:32:34 | "https://secure.com" | https://secure.com |
+| tst-IncompleteUrlSubstringSanitization.js:33:5:33:39 | x.index ... m:443") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:33:15:33:38 | "https: ... om:443" | https://secure.com:443 |
+| tst-IncompleteUrlSubstringSanitization.js:34:5:34:36 | x.index ... .com/") | '$@' may be at an arbitrary position in the sanitized URL. | tst-IncompleteUrlSubstringSanitization.js:34:15:34:35 | "https: ... e.com/" | https://secure.com/ |
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSubstringSanitization.qlref
@@ -0,0 +1 @@
+Security/CWE-020/IncompleteUrlSubstringSanitization.ql
--- a/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteUrlSubstringSanitization.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteUrlSubstringSanitization.js
@@ -0,0 +1,51 @@
+(function(x){
+    x.indexOf("internal"); // NOT OK, but not flagged
+    x.indexOf("localhost"); // NOT OK, but not flagged
+    x.indexOf("secure.com"); // NOT OK
+    x.indexOf("secure.net"); // NOT OK
+    x.indexOf(".secure.com"); // NOT OK
+    x.indexOf("sub.secure."); // NOT OK, but not flagged
+    x.indexOf(".sub.secure."); // NOT OK, but not flagged
+
+    x.indexOf("secure.com") === -1; // NOT OK
+    x.indexOf("secure.com") === 0; // NOT OK
+    x.indexOf("secure.com") >= 0; // NOT OK
+
+    x.startsWith("https://secure.com"); // NOT OK
+    x.endsWith("secure.com"); // NOT OK
+    x.endsWith(".secure.com"); // OK
+    x.startsWith("secure.com/"); // OK
+    x.indexOf("secure.com/") === 0; // OK
+
+    x.includes("secure.com"); // NOT OK
+
+    x.indexOf("#"); // OK
+    x.indexOf(":"); // OK
+    x.indexOf(":/"); // OK
+    x.indexOf("://"); // OK
+    x.indexOf("//"); // OK
+    x.indexOf(":443"); // OK
+    x.indexOf("/some/path/"); // OK
+    x.indexOf("some/path"); // OK
+    x.indexOf("/index.html"); // OK
+    x.indexOf(":template:"); // OK
+    x.indexOf("https://secure.com"); // NOT OK
+    x.indexOf("https://secure.com:443"); // NOT OK
+    x.indexOf("https://secure.com/"); // NOT OK
+
+    x.indexOf(".cn"); // NOT OK, but not flagged
+    x.indexOf(".jpg"); // OK
+    x.indexOf("index.html"); // OK
+    x.indexOf("index.js"); // OK
+    x.indexOf("index.php"); // OK
+    x.indexOf("index.css"); // OK
+
+    x.indexOf("secure=true"); // OK (query param)
+    x.indexOf("&auth="); // OK (query param)
+
+    x.indexOf(getCurrentDomain()); // NOT OK, but not flagged
+    x.indexOf(location.origin); // NOT OK, but not flagged
+
+    x.indexOf("tar.gz") + offset // OK
+    x.indexOf("tar.gz") - offset // OK
+});
				`@@ -0,0 +1 @@`
				`Security/CWE-020/IncompleteUrlSubstringSanitization.ql`