Moved new query to 'experimental'

Moved lists of domains to data extensions, including adding those to the overall qlpack.yml Expanded scope of new query to further domains operated by the untrusted owners of polyfill.io
2026-04-27 01:35:13 +02:00 · 2024-07-09 16:38:01 +01:00
parent 1fe14e26b1
commit 86afd54a9b
22 changed files with 204 additions and 136 deletions
--- a/javascript/ql/lib/qlpack.yml
+++ b/javascript/ql/lib/qlpack.yml
@@ -16,4 +16,5 @@ dependencies:
 dataExtensions:
  - semmle/javascript/frameworks/**/model.yml
  - semmle/javascript/frameworks/**/*.model.yml
+  - semmle/javascript/security/domains/**/*.model.yml
 warnOnImplicitThis: true
--- a/javascript/ql/lib/semmle/javascript/security/FunctionalityFromUntrustedSource.qll
+++ b/javascript/ql/lib/semmle/javascript/security/FunctionalityFromUntrustedSource.qll
@@ -37,22 +37,10 @@ module StaticCreation {
  predicate isCdnUrlWithCheckingRequired(string url) {
    // Some CDN URLs are required to have an integrity attribute. We only add CDNs to that list
    // that recommend integrity-checking.
-    url.regexpMatch("(?i)^https?://" +
-        [
-          "code\\.jquery\\.com", //
-          "cdnjs\\.cloudflare\\.com", //
-          "cdnjs\\.com", //
-        ] + "/.*\\.js$")
-  }
-
-  /** Holds if `url` refers to a compromised CDN, that should not be trusted. */
-  bindingset[url]
-  predicate isCompromisedCdn(string url) {
-    url.regexpMatch("(?i)^https?://" +
-        [
-          "cdn\\.polyfill\\.io", // See https://sansec.io/research/polyfill-supply-chain-attack for details
-          "polyfill\\.io",       // "
-        ] + "/.*$")
+    exists(string hostname, string requiredCheckingHostname |
+      hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*\\.js$", 1)
+      and isCdnDomainWithCheckingRequired(requiredCheckingHostname) and hostname = requiredCheckingHostname
+    )
  }

  /** A script element that refers to untrusted content. */
@@ -67,24 +55,15 @@ module StaticCreation {
    override string getProblem() { result = "Script loaded using unencrypted connection." }
  }

-  /** A script element that refers to compromised content. */
-  class CdnFromCompromisedSource extends AddsUntrustedUrl, HTML::ScriptElement {
-    CdnFromCompromisedSource() {
-      isCompromisedCdn(this.getSourcePath())
-    }
-
-    override string getUrl() { result = this.getSourcePath() }
-
-    override string getProblem() {
-      result = "Script loaded from compromised content delivery network."
-    }
-  }
-
  /** A script element that refers to untrusted content. */
  class CdnScriptElementWithUntrustedContent extends AddsUntrustedUrl, HTML::ScriptElement {
    CdnScriptElementWithUntrustedContent() {
      not exists(string digest | not digest = "" | this.getIntegrityDigest() = digest) and
-      isCdnUrlWithCheckingRequired(this.getSourcePath())
+      (
+        isCdnUrlWithCheckingRequired(this.getSourcePath())
+        or
+        isUrlWithUntrustedDomain(super.getSourcePath())
+      )
    }

    override string getUrl() { result = this.getSourcePath() }
@@ -104,6 +83,29 @@ module StaticCreation {
  }
 }

+/** Holds if `url` refers to an URL that uses an untrusted domain. */
+bindingset[url]
+predicate isUrlWithUntrustedDomain(string url) {
+  exists(string hostname |
+    hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*", 1)
+    and isUntrustedHostname(hostname)
+  )
+}
+
+/** Holds if `hostname` refers to a domain or subdomain that is untrusted. */
+bindingset[hostname]
+predicate isUntrustedHostname(string hostname) {
+  exists(string domain |
+    (hostname = domain or hostname.matches("%." + domain)) and 
+    isUntrustedDomain(domain)
+  )
+}
+
+// The following predicates are extended in data extensions under javascript/ql/lib/semmle/javascript/security/domains/
+// and can be extended with custom model packs as necessary.
+extensible predicate isCdnDomainWithCheckingRequired(string hostname);
+extensible predicate isUntrustedDomain(string domain);
+
 /** Looks for dyanmic creation of an element and source. */
 module DynamicCreation {
  /** Holds if `call` creates a tag of kind `name`. */
--- a/javascript/ql/lib/semmle/javascript/security/domains/IntegrityCheckingRequired/integrity_checking_required.model.yml
+++ b/javascript/ql/lib/semmle/javascript/security/domains/IntegrityCheckingRequired/integrity_checking_required.model.yml
@@ -0,0 +1,8 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isCdnDomainWithCheckingRequired
+    data:
+      - ["code.jquery.com"]
+      - ["cdnjs.cloudflare.com"]
+      - ["cdnjs.com"]
--- a/javascript/ql/lib/semmle/javascript/security/domains/compromised/compromised_domains.model.yml
+++ b/javascript/ql/lib/semmle/javascript/security/domains/compromised/compromised_domains.model.yml
@@ -0,0 +1,6 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isUntrustedDomain
+    data:
+      - ["polyfill.io"]
--- a/javascript/ql/lib/semmle/javascript/security/domains/untrusted/untrusted_domains.model.yml
+++ b/javascript/ql/lib/semmle/javascript/security/domains/untrusted/untrusted_domains.model.yml
@@ -0,0 +1,14 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isUntrustedDomain
+    data:
+      # new location of the polyfill.io CDN, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
+      - ["polyfill.com"]
+      - ["polyfillcache.com"]
+
+      # domains operated by the same owner as polyfill.io, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
+      - ["bootcdn.net"]
+      - ["bootcss.com"]
+      - ["staticfile.net"]
+      - ["staticfile.org"]
--- a/javascript/ql/src/Security/CWE-830/FunctionalityFromUntrustedSource.ql
+++ b/javascript/ql/src/Security/CWE-830/FunctionalityFromUntrustedSource.ql
@@ -15,4 +15,7 @@ import javascript
 import semmle.javascript.security.FunctionalityFromUntrustedSource

 from AddsUntrustedUrl s
+// do not alert on explicitly untrusted domains
+// another query can alert on these, js/functionality-from-untrusted-domain
+where not isUrlWithUntrustedDomain(s.getUrl())
 select s, s.getProblem()
--- a/javascript/ql/src/Security/CWE-830/PolyfillIOCompromisedScript.qhelp
+++ b/javascript/ql/src/Security/CWE-830/PolyfillIOCompromisedScript.qhelp
@@ -1,77 +0,0 @@
-<!DOCTYPE qhelp PUBLIC
-"-//Semmle//qhelp//EN"
-"qhelp.dtd">
-<qhelp>
-	<overview>
-		<p>
-		    Polyfill.io was a popular JavaScript polyflll Content Delivery Network (CDN),
-			used to support new web browser standards on older browsers.
-		</p>
-
-		<p>
-		    In February 2024 the domain was sold, and in June 2024 it was widely publicised that the domain
-		    had been used to serve malicious scripts. It was taken down later in that month, leaving a window
-			where sites that used the service could have been compromised.
-		</p>
-
-		<p>
-			Including a resource from an untrusted source or using an untrusted channel may
-			allow an attacker to include arbitrary code in the response.
-			When including an external resource (for example, a <code>script</code> element or an
-			<code>iframe</code> element) on a page, it is important to ensure that the received
-			data is not malicious.
-		</p>
-
-		<p>
-			Even when <code>https</code> is used, an attacker might still compromise the server.
-			When you use a <code>script</code> element, you should check for subresource integrity -
-			that is, you can check the contents of the data received by supplying a cryptographic
-			digest of the expected sources to the <code>script</code> element. The script will only
-			load sources that match the digest and an attacker will be unable to modify the script
-			even when the server is compromised.
-		</p>
-
-		<p>
-			Subresource integrity checking is commonly recommended when importing a fixed version of
-			a library - for example, from a CDN (content-delivery network). Then, the fixed digest
-			of that version of the library can easily be added to the <code>script</code> element's
-			<code>integrity</code> attribute.
-		</p>
-	</overview>
-
-	<recommendation>
-		<p>
-			To help mitigate the risk of including a compromised script, consider whether you need to
-			use a polyfill at all, can use a different polyfill CDN service,
-			or could host an uncompromised version of the polyfill yourself.
-		</p>
-
-		<p>
-			When using a <code>script</code> element to load a script, it is important to use an
-			<code>https</code> URL and to check subresource integrity.
-		</p>
-	</recommendation>
-
-	<example>
-		<p>
-			The following example loads the Polyfill.io library from the <code>polyfill.io</code> CDN without 
-			checking subresource integrity. This use is open to malicious scripts being served by the CDN.
-		</p>
-
-		<sample src="polyfill-nocheck.html" />
-
-		<p>
-			Instead, loading the Polyfill library from a trusted CDN, and checking
-			subresource integrity is recommended, as in the next example.
-		</p>
-
-		<sample src="polyfill-check.html" />
-	</example>
-
-	<references>
-		<li>Sansec: <a href="https://sansec.io/research/polyfill-supply-chain-attack">Polyfill supply chain attack hits 100K+ sites</a></li>
-		<li>Cloudflare: <a href="https://cdnjs.cloudflare.com/polyfill">Upgrade the web. Automatically. Delivers only the polyfills required by the user's web browser.</a></li>
-		<li>Fastly: <a href="https://community.fastly.com/t/new-options-for-polyfill-io-users/2540">New options for Polyfill.io users</a></li>
-		<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Polyfill_(programming)">Polyfill (programming)</a></li>
-	</references>
-</qhelp>
--- a/javascript/ql/src/Security/CWE-830/PolyfillIOCompromisedScript.ql
+++ b/javascript/ql/src/Security/CWE-830/PolyfillIOCompromisedScript.ql
@@ -1,18 +0,0 @@
-/**
- * @name Polyfill.io script use
- * @description Use of script from compromised domain Polyfill.io (https://sansec.io/research/polyfill-supply-chain-attack)
- * @kind problem
- * @security-severity 7.2
- * @problem.severity error
- * @id js/polyfill-io-compromised-script
- * @precision high
- * @tags security
- *       external/cwe/cwe-830
- */
-
-import javascript
-import semmle.javascript.security.FunctionalityFromUntrustedSource
-
-from AddsUntrustedUrl s
-where s.getUrl().regexpMatch("^(?i)https?://(cdn\\.)?polyfill\\.io/.*")
-select s, "Script loaded from known-compromised content delivery network with no integrity check."
--- a/javascript/ql/src/change-notes/2024-07-01-polyfill-io-compromised-script.md
+++ b/javascript/ql/src/change-notes/2024-07-01-polyfill-io-compromised-script.md
@@ -1,6 +0,0 @@
---
-category: minorAnalysis
---
-* Added a new query, `js/polyfill-io-compromised-script`, which detects uses in HTML and JavaScript of the compromised `polyfill.io` content delivery network.
-* Modified existing query, `js/functionality-from-untrusted-source`, to add a new check for the compromised `polyfill.io` content delivery network.
-* Created a shared library, `semmle.javascript.security.FunctionalityFromUntrustedSource`, to separate the logic from the existing query and allow having a separate new Polyfill-specific query.
--- a/javascript/ql/src/change-notes/2024-07-08-functionality-from-untrusted-domain.md
+++ b/javascript/ql/src/change-notes/2024-07-08-functionality-from-untrusted-domain.md
@@ -0,0 +1,6 @@
+---
+category: minorAnalysis
+---
+* Added a new query, `js/functionality-from-untrusted-domain`, which detects uses in HTML and JavaScript scripts from untrusted domains, including the compromised `polyfill.io` content delivery network, and can be extended to detect other compromised scripts using data extensions.
+* Modified existing query, `js/functionality-from-untrusted-source`, to allow adding this new query, but reusing the same logic.
+* Created a shared library, `semmle.javascript.security.FunctionalityFromUntrustedSource`, to separate the logic from that existing query and allow having a separate "untrusted domain" query.
--- a/javascript/ql/src/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.qhelp
+++ b/javascript/ql/src/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.qhelp
@@ -0,0 +1,98 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+	<overview>
+		<p>
+			Content Delivery Networks (CDNs) are used to deliver content to users quickly and efficiently.
+
+			However, they can change hands or be operated by untrustworthy owners, risking the security of the sites that use them.
+
+			Some CDN domains are operated by entities that have used CDNs to deliver malware, which this query identifies.
+		</p>
+
+		<p>
+		    For example, <code>polyfill.io</code> was a popular JavaScript CDN,
+			used to support new web browser standards on older browsers.
+
+		    In February 2024 the domain was sold, and in June 2024 it was publicised that the domain
+		    had been used to serve malicious scripts. It was taken down later in that month, leaving a window
+			where sites that used the service could have been compromised.
+
+			The same operator runs several other CDNs, undermining trust in those too.
+		</p>
+
+		<p>
+			Including a resource from an untrusted source or using an untrusted channel may
+			allow an attacker to include arbitrary code in the response.
+			When including an external resource (for example, a <code>script</code> element) on a page,
+			it is important to ensure that the received data is not malicious.
+		</p>
+
+		<p>
+			Even when <code>https</code> is used, an untrustworthy operator might deliver malware.
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+			Carefully research the ownership of a Content Delivery Network (CDN) before using it in your application.
+		</p>
+
+		<p>
+			If you find code that originated from an untrusted domain in your application, you should review your logs to check for compromise.
+		</p>
+
+		<p>
+			To help mitigate future risk of including a script that could be compromised, consider whether you need to
+			use a polyfill or other library at all. Modern browsers do not require a polyfill, and other popular libraries are redundant after enhancements to HTML 5.
+		</p>
+
+		<p>
+			If you do need a polyfill service or library, move to using a trusted CDN.
+		</p>
+		
+		<p>
+			When you use a <code>script</code> or <code>link</code> element,
+			you should check for <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">subresource integrity (SRI)</a>,
+			and pin to a hash of a version of the service that you can trust (for example, because you have audited it for security and unwanted features).
+		    
+			A dynamic service cannot be easily used with SRI. Nevertheless,
+			it is possible to list multiple acceptable SHA hashes in the <code>integrity</code> attribute,
+			such as those for the content generated for major browers used by your users.
+		</p>
+
+		<p>
+			You can also choose to self-host an uncompromised version of the service or library.
+		</p>
+	</recommendation>
+
+	<example>
+		<p>
+			The following example loads the Polyfill.io library from the <code>polyfill.io</code> CDN. This use was open to malicious scripts being served by the CDN.
+		</p>
+
+		<sample src="polyfill-compromised.html" />
+
+		<p>
+			Instead, load the Polyfill library from a trusted CDN, as in the next example:
+		</p>
+
+		<sample src="polyfill-trusted.html" />
+
+		<p>
+			If you can investigate the most used browsers by your users, you can list the hashes of the polyfills for those browsers:
+		</p>
+
+		<sample src="polyfill-sri.html" />
+
+	</example>
+
+	<references>
+		<li>Sansec: <a href="https://sansec.io/research/polyfill-supply-chain-attack">Polyfill supply chain attack hits 100K+ sites</a></li>
+		<li>Cloudflare: <a href="https://cdnjs.cloudflare.com/polyfill">Upgrade the web. Automatically. Delivers only the polyfills required by the user's web browser.</a></li>
+		<li>Fastly: <a href="https://community.fastly.com/t/new-options-for-polyfill-io-users/2540">New options for Polyfill.io users</a></li>
+		<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Polyfill_(programming)">Polyfill (programming)</a></li>
+		<li>MDN Web Docs: <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">Subresource Integrity</a></li>
+	</references>
+</qhelp>
--- a/javascript/ql/src/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.ql
+++ b/javascript/ql/src/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.ql
@@ -0,0 +1,18 @@
+/**
+ * @name Untrusted domain used in script or other content
+ * @description Use of a script or other content from an untrusted or compromised domain
+ * @kind problem
+ * @security-severity 7.2
+ * @problem.severity error
+ * @id js/functionality-from-untrusted-domain
+ * @precision high
+ * @tags security
+ *       external/cwe/cwe-830
+ */
+
+import javascript
+import semmle.javascript.security.FunctionalityFromUntrustedSource
+
+from AddsUntrustedUrl s
+where isUrlWithUntrustedDomain(s.getUrl())
+select s, "Content loaded from untrusted domain with no integrity check."
--- a/javascript/ql/src/experimental/Security/CWE-830/polyfill-compromised.html
+++ b/javascript/ql/src/experimental/Security/CWE-830/polyfill-compromised.html
--- a/javascript/ql/src/experimental/Security/CWE-830/polyfill-sri.html
+++ b/javascript/ql/src/experimental/Security/CWE-830/polyfill-sri.html
@@ -0,0 +1,9 @@
+<html>
+    <head>
+        <title>Polyfill demo - Cloudflare hosted with pinned version (with integrity checking for a *very limited* browser set - just an example!)</title>
+        <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?version=4.8.0" integrity="sha384-i0IGVuZBkKZqwXTD4CH4kcksIbFx7WKFMdxN8zUhLFHpLdELF0ym0jxa6UvLhW8/ sha384-3d4jRKquKl90C9aFG+eH4lPJmtbPHgACWHrp+VomFOxF8lzx2jxqeYkhpRg18UWC" crossorigin="anonymous"></script>
+    </head>
+    <body>
+        ...
+    </body>
+</html>
--- a/javascript/ql/src/experimental/Security/CWE-830/polyfill-trusted.html
+++ b/javascript/ql/src/experimental/Security/CWE-830/polyfill-trusted.html
@@ -1,7 +1,7 @@
 <html>
    <head>
-        <title>Polyfill demo - Cloudflare hosted with pinned version and integrity checking</title>
-        <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?version=4.8.0" integrity="sha384-3d4jRKquKl90C9aFG+eH4lPJmtbPHgACWHrp+VomFOxF8lzx2jxqeYkhpRg18UWC" crossorigin="anonymous"></script>
+        <title>Polyfill demo - Cloudflare hosted with pinned version (but no integrity checking, since it is dynamically generated)</title>
+        <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?version=4.8.0" crossorigin="anonymous"></script>
    </head>
    <body>
        ...
--- a/javascript/ql/test/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.expected
+++ b/javascript/ql/test/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.expected
@@ -0,0 +1,6 @@
+WARNING: Unused predicate isCdnDomainWithCheckingRequiredTest (FunctionalityFromUntrustedDomain.ql:34,11-46)
+WARNING: Unused predicate isUntrustedDomainTest (FunctionalityFromUntrustedDomain.ql:26,11-32)
+WARNING: Unused predicate isUntrustedDomainTest2 (FunctionalityFromUntrustedDomain.ql:30,11-33)
+WARNING: Unused predicate isUntrustedHostnameTest (FunctionalityFromUntrustedDomain.ql:21,11-34)
+WARNING: Unused predicate isUntrustedTest (FunctionalityFromUntrustedDomain.ql:16,11-26)
+| polyfill-nocheck.html:4:9:4:98 | <script>...</> | Content loaded from untrusted domain with no integrity check. |
--- a/javascript/ql/test/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.qlref
+++ b/javascript/ql/test/experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.ql
--- a/javascript/ql/test/experimental/Security/CWE-830/polyfill-check.html
+++ b/javascript/ql/test/experimental/Security/CWE-830/polyfill-check.html
--- a/javascript/ql/test/experimental/Security/CWE-830/polyfill-nocheck.html
+++ b/javascript/ql/test/experimental/Security/CWE-830/polyfill-nocheck.html
--- a/javascript/ql/test/query-tests/Security/CWE-830/FunctionalityFromUntrustedSource.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-830/FunctionalityFromUntrustedSource.expected
@@ -5,4 +5,3 @@
 | StaticCreationOfUntrustedSourceUse.html:6:9:6:56 | <script>...</> | Script loaded using unencrypted connection. |
 | StaticCreationOfUntrustedSourceUse.html:9:9:9:58 | <iframe>...</> | Iframe loaded using unencrypted connection. |
 | StaticCreationOfUntrustedSourceUse.html:21:9:21:155 | <script>...</> | Script loaded from content delivery network with no integrity check. |
-| polyfill-nocheck.html:4:9:4:98 | <script>...</> | Script loaded from content delivery network with no integrity check. |
--- a/javascript/ql/test/query-tests/Security/CWE-830/PolyfillIOCompromisedScript.expected
+++ b/javascript/ql/test/query-tests/Security/CWE-830/PolyfillIOCompromisedScript.expected
@@ -1 +0,0 @@
-| polyfill-nocheck.html:4:9:4:98 | <script>...</> | Script loaded from known-compromised content delivery network with no integrity check. |
--- a/javascript/ql/test/query-tests/Security/CWE-830/PolyfillIOCompromisedScript.qlref
+++ b/javascript/ql/test/query-tests/Security/CWE-830/PolyfillIOCompromisedScript.qlref
@@ -1 +0,0 @@
-Security/CWE-830/PolyfillIOCompromisedScript.ql
				`@@ -0,0 +1 @@`
				`experimental/Security/CWE-830/FunctionalityFromUntrustedDomain.ql`
				`@@ -1 +0,0 @@`
				`\| polyfill-nocheck.html:4:9:4:98 \| <script>...</> \| Script loaded from known-compromised content delivery network with no integrity check. \|`
				`@@ -1 +0,0 @@`
				`Security/CWE-830/PolyfillIOCompromisedScript.ql`