Merge branch 'main' into explicit-this

2026-07-21 03:08:25 +02:00 · 2021-11-24 15:24:58 +01:00
parent f0c5a80d1a 3bab8c6d1d
commit 08ce03cd93
455 changed files with 22630 additions and 4220 deletions
--- a/javascript/ql/src/Expressions/UnknownDirective.ql
+++ b/javascript/ql/src/Expressions/UnknownDirective.ql
@@ -18,5 +18,5 @@ where
  // but exclude attribute top-levels: `<a href="javascript:'some-attribute-string'">`
  not d.getParent() instanceof CodeInAttribute and
  // exclude babel generated directives like "@babel/helpers - typeof".
-  not d.getDirectiveText().prefix(14) = "@babel/helpers"
+  not d.getDirectiveText().matches("@babel/helpers%")
 select d, "Unknown directive: '" + truncate(d.getDirectiveText(), 20, " ... (truncated)") + "'."
--- a/javascript/ql/src/Security/CWE-116/BadTagFilter.qhelp
+++ b/javascript/ql/src/Security/CWE-116/BadTagFilter.qhelp
@@ -0,0 +1,54 @@
+<!DOCTYPE qhelp PUBLIC
+  "-//Semmle//qhelp//EN"
+  "qhelp.dtd">
+<qhelp>
+
+<overview>
+<p>
+It is possible to match some single HTML tags using regular expressions (parsing general HTML using 
+regular expressions is impossible). However, if the regular expression is not written well it might 
+be possible to circumvent it, which can lead to cross-site scripting or other security issues.
+</p>
+<p>
+Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
+will often render invalid HTML containing syntax errors. 
+Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
+</p>
+</overview>
+
+<recommendation>
+<p>
+Use a well-tested sanitization or parser library if at all possible. These libraries are much more
+likely to handle corner cases correctly than a custom implementation.
+</p>
+</recommendation>
+
+<example>
+<p>
+The following example attempts to filters out all <code>&lt;script&gt;</code> tags.
+</p>
+
+<sample src="examples/BadTagFilter.js" />
+
+<p>
+The above sanitizer does not filter out all <code>&lt;script&gt;</code> tags. 
+Browsers will not only accept <code>&lt;/script&gt;</code> as script end tags, but also tags such as <code>&lt;/script foo="bar"&gt;</code> even though it is a parser error.
+This means that an attack string such as <code>&lt;script&gt;alert(1)&lt;/script foo="bar"&gt;</code> will not be filtered by 
+the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
+</p>
+
+<p>
+Other corner cases include that HTML comments can end with <code>--!&gt;</code>, 
+and that HTML tag names can contain upper case characters.
+</p>
+</example>
+
+<references>
+<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy &amp; Paste</a>.</li>
+<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
+<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
+<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
+</references>
+</qhelp>
+
+
--- a/javascript/ql/src/Security/CWE-116/BadTagFilter.ql
+++ b/javascript/ql/src/Security/CWE-116/BadTagFilter.ql
@@ -0,0 +1,19 @@
+/**
+ * @name Bad HTML filtering regexp
+ * @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
+ * @kind problem
+ * @problem.severity warning
+ * @security-severity 7.8
+ * @precision high
+ * @id js/bad-tag-filter
+ * @tags correctness
+ *       security
+ *       external/cwe/cwe-116
+ *       external/cwe/cwe-020
+ */
+
+import semmle.javascript.security.BadTagFilterQuery
+
+from HTMLMatchingRegExp regexp, string msg
+where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
+select regexp, msg
--- a/javascript/ql/src/Security/CWE-116/examples/BadTagFilter.js
+++ b/javascript/ql/src/Security/CWE-116/examples/BadTagFilter.js
@@ -0,0 +1,8 @@
+function filterScript(html) {
+    var scriptRegex = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
+    var match;
+    while ((match = scriptRegex.exec(html)) !== null) {
+        html = html.replace(match[0], match[1]);
+    }
+    return html;
+}