mirror of
https://github.com/github/codeql.git
synced 2026-04-24 08:15:14 +02:00
Merge pull request #6561 from erik-krogh/htmlReg
JS/Py/Ruby: add a bad-tag-filter query
This commit is contained in:
54
ruby/ql/src/queries/security/cwe-116/BadTagFilter.qhelp
Normal file
54
ruby/ql/src/queries/security/cwe-116/BadTagFilter.qhelp
Normal file
@@ -0,0 +1,54 @@
|
||||
<!DOCTYPE qhelp PUBLIC
|
||||
"-//Semmle//qhelp//EN"
|
||||
"qhelp.dtd">
|
||||
<qhelp>
|
||||
|
||||
<overview>
|
||||
<p>
|
||||
It is possible to match some single HTML tags using regular expressions (parsing general HTML using
|
||||
regular expressions is impossible). However, if the regular expression is not written well it might
|
||||
be possible to circumvent it, which can lead to cross-site scripting or other security issues.
|
||||
</p>
|
||||
<p>
|
||||
Some of these mistakes are caused by browsers having very forgiving HTML parsers, and
|
||||
will often render invalid HTML containing syntax errors.
|
||||
Regular expressions that attempt to match HTML should also recognize tags containing such syntax errors.
|
||||
</p>
|
||||
</overview>
|
||||
|
||||
<recommendation>
|
||||
<p>
|
||||
Use a well-tested sanitization or parser library if at all possible. These libraries are much more
|
||||
likely to handle corner cases correctly than a custom implementation.
|
||||
</p>
|
||||
</recommendation>
|
||||
|
||||
<example>
|
||||
<p>
|
||||
The following example attempts to filters out all <code><script></code> tags.
|
||||
</p>
|
||||
|
||||
<sample src="examples/BadTagFilter.rb" />
|
||||
|
||||
<p>
|
||||
The above sanitizer does not filter out all <code><script></code> tags.
|
||||
Browsers will not only accept <code></script></code> as script end tags, but also tags such as <code></script foo="bar"></code> even though it is a parser error.
|
||||
This means that an attack string such as <code><script>alert(1)</script foo="bar"></code> will not be filtered by
|
||||
the function, and <code>alert(1)</code> will be executed by a browser if the string is rendered as HTML.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Other corner cases include that HTML comments can end with <code>--!></code>,
|
||||
and that HTML tag names can contain upper case characters.
|
||||
</p>
|
||||
</example>
|
||||
|
||||
<references>
|
||||
<li>Securitum: <a href="https://research.securitum.com/the-curious-case-of-copy-paste/">The Curious Case of Copy & Paste</a>.</li>
|
||||
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags#answer-1732454">You can't parse [X]HTML with regex</a>.</li>
|
||||
<li>HTML Standard: <a href="https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state">Comment end bang state</a>.</li>
|
||||
<li>stackoverflow.com: <a href="https://stackoverflow.com/questions/25559999/why-arent-browsers-strict-about-html">Why aren't browsers strict about HTML?</a>.</li>
|
||||
</references>
|
||||
</qhelp>
|
||||
|
||||
|
||||
19
ruby/ql/src/queries/security/cwe-116/BadTagFilter.ql
Normal file
19
ruby/ql/src/queries/security/cwe-116/BadTagFilter.ql
Normal file
@@ -0,0 +1,19 @@
|
||||
/**
|
||||
* @name Bad HTML filtering regexp
|
||||
* @description Matching HTML tags using regular expressions is hard to do right, and can easily lead to security issues.
|
||||
* @kind problem
|
||||
* @problem.severity warning
|
||||
* @security-severity 7.8
|
||||
* @precision high
|
||||
* @id rb/bad-tag-filter
|
||||
* @tags correctness
|
||||
* security
|
||||
* external/cwe/cwe-116
|
||||
* external/cwe/cwe-020
|
||||
*/
|
||||
|
||||
import codeql.ruby.security.BadTagFilterQuery
|
||||
|
||||
from HTMLMatchingRegExp regexp, string msg
|
||||
where msg = min(string m | isBadRegexpFilter(regexp, m) | m order by m.length(), m) // there might be multiple, we arbitrarily pick the shortest one
|
||||
select regexp, msg
|
||||
@@ -0,0 +1,8 @@
|
||||
def filter_script_tags(html)
|
||||
old_html = ""
|
||||
while (html != old_html)
|
||||
old_html = html
|
||||
html = html.gsub(/<script[^>]*>.*<\/script>/m, "")
|
||||
end
|
||||
html
|
||||
end
|
||||
@@ -15,8 +15,8 @@
|
||||
|
||||
import DataFlow::PathGraph
|
||||
import codeql.ruby.DataFlow
|
||||
import codeql.ruby.regexp.PolynomialReDoSQuery
|
||||
import codeql.ruby.regexp.SuperlinearBackTracking
|
||||
import codeql.ruby.security.performance.PolynomialReDoSQuery
|
||||
import codeql.ruby.security.performance.SuperlinearBackTracking
|
||||
|
||||
from
|
||||
PolynomialReDoS::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink,
|
||||
|
||||
@@ -14,9 +14,9 @@
|
||||
* external/cwe/cwe-400
|
||||
*/
|
||||
|
||||
import codeql.ruby.regexp.ExponentialBackTracking
|
||||
import codeql.ruby.regexp.ReDoSUtil
|
||||
import codeql.ruby.regexp.RegExpTreeView
|
||||
import codeql.ruby.security.performance.ExponentialBackTracking
|
||||
import codeql.ruby.security.performance.ReDoSUtil
|
||||
import codeql.ruby.security.performance.RegExpTreeView
|
||||
|
||||
from RegExpTerm t, string pump, State s, string prefixMsg
|
||||
where hasReDoSResult(t, pump, s, prefixMsg)
|
||||
|
||||
Reference in New Issue
Block a user