Merge pull request #15398 from RasmusWL/html-escape

Python: Add `html.escape` as HTML sanitizer
This commit is contained in:
Rasmus Wriedt Larsen
2024-01-30 16:06:01 +01:00
committed by GitHub
3 changed files with 42 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Added `html.escape` as a sanitizer for HTML.

View File

@@ -4830,6 +4830,35 @@ module StdlibPrivate {
override predicate isShellInterpreted(DataFlow::Node arg) { arg = this.getCommand() }
}
}
// ---------------------------------------------------------------------------
// html
// ---------------------------------------------------------------------------
/**
* A call to 'html.escape'.
* See https://docs.python.org/3/library/html.html#html.escape
*/
private class HtmlEscapeCall extends Escaping::Range, API::CallNode {
HtmlEscapeCall() {
this = API::moduleImport("html").getMember("escape").getACall() and
// if quote escaping is disabled, that might lead to XSS if the result is inserted
// in the attribute value of a tag, such as `<foo bar="escape_result">`. Since we
// don't know how values are being inserted, and we don't want to lose these
// results (FNs), we require quote escaping to be enabled. This might lead to some
// FPs, so we might need to revisit this in the future.
not this.getParameter(1, "quote")
.getAValueReachingSink()
.asExpr()
.(ImmutableLiteral)
.booleanValue() = false
}
override DataFlow::Node getAnInput() { result = this.getParameter(0, "s").asSink() }
override DataFlow::Node getOutput() { result = this }
override string getKind() { result = Escaping::getHtmlKind() }
}
}
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,9 @@
import html
s = "tainted"
html.escape(s) # $ escapeInput=s escapeKind=html escapeOutput=html.escape(..)
html.escape(s, True) # $ escapeInput=s escapeKind=html escapeOutput=html.escape(..)
# not considered html escapes, since they don't escape all relevant characters
html.escape(s, False)
html.escape(s, quote=False)