Merge pull request #2310 from max-schaefer/js/insufficient-url-scheme-check

JavaScript: Add query `IncompleteUrlSchemeCheck`
2025-12-16 16:53:25 +01:00 · 2019-11-14 22:13:02 +01:00
parent 0638907825 3b1e6c362c
commit 2ea7d141c8
10 changed files with 131 additions and 4 deletions
--- a/change-notes/1.23/analysis-javascript.md
+++ b/change-notes/1.23/analysis-javascript.md
@@ -21,14 +21,15 @@

 | **Query**                                                                 | **Tags**                                                          | **Purpose**                                                                                                                                                                            |
 |---------------------------------------------------------------------------|-------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Unused index variable (`js/unused-index-variable`)                        | correctness                                                       | Highlights loops that iterate over an array, but do not use the index variable to access array elements, indicating a possible typo or logic error. Results are shown on LGTM by default. |
+| Ignoring result from pure array method (`js/ignore-array-result`)         | maintainability, correctness                                      | Highlights calls to array methods without side effects where the return value is ignored. Results are shown on LGTM by default. |
+| Incomplete URL scheme check (`js/incomplete-url-scheme-check`)            | security, correctness, external/cwe/cwe-020                       | Highlights checks for `javascript:` URLs that do not take `data:` or `vbscript:` URLs into account. Results are shown on LGTM by default. |
 | Loop bound injection (`js/loop-bound-injection`)                          | security, external/cwe/cwe-834                                      | Highlights loops where a user-controlled object with an arbitrary .length value can trick the server to loop indefinitely. Results are shown on LGTM by default. |
-| Suspicious method name (`js/suspicious-method-name-declaration`)          | correctness, typescript, methods                                  | Highlights suspiciously named methods where the developer likely meant to write a constructor or function. Results are shown on LGTM by default. |
 | Shell command built from environment values (`js/shell-command-injection-from-environment`) | correctness, security, external/cwe/cwe-078, external/cwe/cwe-088 | Highlights shell commands that may change behavior inadvertently depending on the execution environment, indicating a possible violation of [CWE-78](https://cwe.mitre.org/data/definitions/78.html). Results are shown on LGTM by default.|
+| Suspicious method name (`js/suspicious-method-name-declaration`)          | correctness, typescript, methods                                  | Highlights suspiciously named methods where the developer likely meant to write a constructor or function. Results are shown on LGTM by default. |
+| Unreachable method overloads (`js/unreachable-method-overloads`)          | correctness, typescript                                           | Highlights method overloads that are impossible to use from client code. Results are shown on LGTM by default. |
+| Unused index variable (`js/unused-index-variable`)                        | correctness                                                       | Highlights loops that iterate over an array, but do not use the index variable to access array elements, indicating a possible typo or logic error. Results are shown on LGTM by default. |
 | Use of returnless function (`js/use-of-returnless-function`)              | maintainability, correctness                                      | Highlights calls where the return value is used, but the callee never returns a value. Results are shown on LGTM by default. |
 | Useless regular expression character escape (`js/useless-regexp-character-escape`) | correctness, security, external/cwe/cwe-20 | Highlights regular expression strings with useless character escapes, indicating a possible violation of [CWE-20](https://cwe.mitre.org/data/definitions/20.html). Results are shown on LGTM by default. |
-| Unreachable method overloads (`js/unreachable-method-overloads`)          | correctness, typescript                                           | Highlights method overloads that are impossible to use from client code. Results are shown on LGTM by default. |
-| Ignoring result from pure array method (`js/ignore-array-result`)         | maintainability, correctness                                      | Highlights calls to array methods without side effects where the return value is ignored. Results are shown on LGTM by default. |

 ## Changes to existing queries

--- a/javascript/config/suites/javascript/security
+++ b/javascript/config/suites/javascript/security
@@ -1,6 +1,7 @@
 + semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200
 + semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteHostnameRegExp.ql: /Security/CWE/CWE-020
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSchemeCheck.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
 + semmlecode-javascript-queries/Security/CWE-020/MissingRegExpAnchor.ql: /Security/CWE/CWE-020
--- a/javascript/ql/src/Security/CWE-020/IncompleteUrlSchemeCheck.qhelp
+++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlSchemeCheck.qhelp
@@ -0,0 +1,42 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+<overview>
+<p>
+URLs starting with <code>javascript:</code> can be used to encode JavaScript code to be executed
+when the URL is visited. While this is a powerful mechanism for creating feature-rich and responsive
+web applications, it is also a potential security risk: if the URL comes from an untrusted source,
+it might contain harmful JavaScript code. For this reason, many frameworks and libraries first check
+the URL scheme of any untrusted URL, and reject URLs with the <code>javascript:</code> scheme.
+</p>
+<p>
+However, the <code>data:</code> and <code>vbscript:</code> schemes can be used to represent
+executable code in a very similar way, so any validation logic that checks against
+<code>javascript:</code>, but not against <code>data:</code> and <code>vbscript:</code>, is likely to
+be insufficient.
+</p>
+</overview>
+<recommendation>
+<p>
+Add checks covering both <code>data:</code> and <code>vbscript:</code>.
+</p>
+</recommendation>
+<example>
+<p>
+The following function validates a (presumably untrusted) URL <code>url</code>. If it starts with
+<code>javascript:</code> (case-insensitive and potentially preceded by whitespace), the harmless
+placeholder URL <code>about:blank</code> is returned to prevent code injection; otherwise
+<code>url</code> itself is returned.
+</p>
+<sample src="examples/IncompleteUrlSchemeCheck.js"/>
+<p>
+While this check provides partial projection, it should be extended to cover <code>data:</code>
+and <code>vbscript:</code> as well:
+</p>
+<sample src="examples/IncompleteUrlSchemeCheckGood.js"/>
+</example>
+<references>
+<li>WHATWG: <a href="https://wiki.whatwg.org/wiki/URL_schemes">URL schemes</a>.</li>
+</references>
+</qhelp>
--- a/javascript/ql/src/Security/CWE-020/IncompleteUrlSchemeCheck.ql
+++ b/javascript/ql/src/Security/CWE-020/IncompleteUrlSchemeCheck.ql
@@ -0,0 +1,57 @@
+/**
+ * @name Incomplete URL scheme check
+ * @description Checking for the "javascript:" URL scheme without also checking for "vbscript:"
+ *              and "data:" suggests a logic error or even a security vulnerability.
+ * @kind problem
+ * @problem.severity warning
+ * @precision high
+ * @id js/incomplete-url-scheme-check
+ * @tags security
+ *       correctness
+ *       external/cwe/cwe-020
+ */
+
+import javascript
+import semmle.javascript.dataflow.internal.AccessPaths
+
+/** A URL scheme that can be used to represent executable code. */
+class DangerousScheme extends string {
+  DangerousScheme() { this = "data:" or this = "javascript:" or this = "vbscript:" }
+}
+
+/** Gets a data-flow node that checks `nd` against the given `scheme`. */
+DataFlow::Node schemeCheck(
+  DataFlow::Node nd, DangerousScheme scheme
+) {
+  // check of the form `nd.startsWith(scheme)`
+  exists(StringOps::StartsWith sw | sw = result |
+    sw.getBaseString() = nd and
+    sw.getSubstring().mayHaveStringValue(scheme)
+  )
+  or
+  // propagate through trimming, case conversion, and regexp replace
+  exists(DataFlow::MethodCallNode stringop |
+    stringop.getMethodName().matches("trim%") or
+    stringop.getMethodName().matches("to%Case") or
+    stringop.getMethodName() = "replace"
+  |
+    result = schemeCheck(stringop, scheme) and
+    nd = stringop.getReceiver()
+  )
+  or
+  // propagate through local data flow
+  result = schemeCheck(nd.getASuccessor(), scheme)
+}
+
+/** Gets a data-flow node that checks an instance of `ap` against the given `scheme`. */
+DataFlow::Node schemeCheckOn(AccessPath ap, DangerousScheme scheme) {
+  result = schemeCheck(ap.getAnInstance().flow(), scheme)
+}
+
+from AccessPath ap, int n
+where
+  n = strictcount(DangerousScheme s) and
+  strictcount(DangerousScheme s | exists(schemeCheckOn(ap, s))) < n
+select schemeCheckOn(ap, "javascript:"),
+  "This check does not consider " +
+    strictconcat(DangerousScheme s | not exists(schemeCheckOn(ap, s)) | s, " and ") + "."
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSchemeCheck.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSchemeCheck.js
@@ -0,0 +1,6 @@
+function sanitizeUrl(url) {
+    let u = decodeURI(url).trim().toLowerCase();
+    if (u.startsWith("javascript:"))
+        return "about:blank";
+    return url;
+}
--- a/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSchemeCheckGood.js
+++ b/javascript/ql/src/Security/CWE-020/examples/IncompleteUrlSchemeCheckGood.js
@@ -0,0 +1,6 @@
+function sanitizeUrl(url) {
+    let u = decodeURI(url).trim().toLowerCase();
+    if (u.startsWith("javascript:") || u.startsWith("data:") || u.startsWith("vbscript:"))
+        return "about:blank";
+    return url;
+}
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.expected
@@ -0,0 +1 @@
+| IncompleteUrlSchemeCheck.js:3:9:3:35 | u.start ... ript:") | This check does not consider data: and vbscript:. |
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.js
@@ -0,0 +1,6 @@
+function sanitizeUrl(url) {
+    let u = decodeURI(url).trim().toLowerCase();
+    if (u.startsWith("javascript:"))
+        return "about:blank";
+    return url;
+}
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheck.qlref
@@ -0,0 +1 @@
+Security/CWE-020/IncompleteUrlSchemeCheck.ql
--- a/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheckGood.js
+++ b/javascript/ql/test/query-tests/Security/CWE-020/IncompleteUrlSchemeCheckGood.js
@@ -0,0 +1,6 @@
+function sanitizeUrl(url) {
+    let u = decodeURI(url).trim().toLowerCase();
+    if (u.startsWith("javascript:") || u.startsWith("data:") || u.startsWith("vbscript:"))
+        return "about:blank";
+    return url;
+}
				`@@ -0,0 +1 @@`
				`\| IncompleteUrlSchemeCheck.js:3:9:3:35 \| u.start ... ript:") \| This check does not consider data: and vbscript:. \|`
				`@@ -0,0 +1 @@`
				`Security/CWE-020/IncompleteUrlSchemeCheck.ql`