diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 21ef902f2e5..f3026d8faad 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -680,6 +680,9 @@ module Escaping { /** Gets the escape-kind for escaping a string so it can safely be included in HTML. */ string getHtmlKind() { result = "html" } + /** Gets the escape-kind for escaping a string so it can safely be included in XML. */ + string getXmlKind() { result = "xml" } + /** Gets the escape-kind for escaping a string so it can safely be included in a regular expression. */ string getRegexKind() { result = "regex" } @@ -710,6 +713,15 @@ class HtmlEscaping extends Escaping { HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() } } +/** + * An escape of a string so it can be safely included in + * the body of an XML element, for example, replacing `&` and `<>` in + * `&xxe;`. + */ +class XmlEscaping extends Escaping { + XmlEscaping() { super.getKind() = Escaping::getXmlKind() } +} + /** * An escape of a string so it can be safely included in * the body of a regex. diff --git a/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll b/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll index e77b92a40dc..99cfd75bec6 100644 --- a/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll +++ b/python/ql/lib/semmle/python/frameworks/MarkupSafe.qll @@ -83,7 +83,7 @@ private module MarkupSafeModel { } /** Taint propagation for `markupsafe.Markup`. */ - private class AddtionalTaintStep extends TaintTracking::AdditionalTaintStep { + private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { nodeTo.(ClassInstantiation).getArg(0) = nodeFrom } @@ -92,11 +92,7 @@ private module MarkupSafeModel { /** Any escaping performed via the `markupsafe` package. */ abstract private class MarkupSafeEscape extends Escaping::Range { - override string getKind() { - // TODO: this package claims to escape for both HTML and XML, but for now we don't - // model XML. - result = Escaping::getHtmlKind() - } + override string getKind() { result in [Escaping::getHtmlKind(), Escaping::getXmlKind()] } } /** A call to any of the escaping functions in `markupsafe` */ diff --git a/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll index 525bceeb04a..c98e8153be9 100644 --- a/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll @@ -44,4 +44,11 @@ module Xxe { ) } } + + /** + * An XML escaping, considered as a sanitizer. + */ + class XmlEscapingAsSanitizer extends Sanitizer { + XmlEscapingAsSanitizer() { this = any(XmlEscaping esc).getOutput() } + } } diff --git a/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected index c18b417db90..53dc9f018cb 100644 --- a/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected +++ b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected @@ -1,25 +1,25 @@ edges | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | GSSA Variable request | -| test.py:1:26:1:32 | GSSA Variable request | test.py:8:19:8:25 | ControlFlowNode for request | -| test.py:1:26:1:32 | GSSA Variable request | test.py:19:19:19:25 | ControlFlowNode for request | -| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | -| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | -| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | -| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | -| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | +| test.py:1:26:1:32 | GSSA Variable request | test.py:9:19:9:25 | ControlFlowNode for request | +| test.py:1:26:1:32 | GSSA Variable request | test.py:20:19:20:25 | ControlFlowNode for request | +| test.py:9:19:9:25 | ControlFlowNode for request | test.py:9:19:9:30 | ControlFlowNode for Attribute | +| test.py:9:19:9:30 | ControlFlowNode for Attribute | test.py:9:19:9:45 | ControlFlowNode for Subscript | +| test.py:9:19:9:45 | ControlFlowNode for Subscript | test.py:10:34:10:44 | ControlFlowNode for xml_content | +| test.py:20:19:20:25 | ControlFlowNode for request | test.py:20:19:20:30 | ControlFlowNode for Attribute | +| test.py:20:19:20:30 | ControlFlowNode for Attribute | test.py:20:19:20:45 | ControlFlowNode for Subscript | +| test.py:20:19:20:45 | ControlFlowNode for Subscript | test.py:31:34:31:44 | ControlFlowNode for xml_content | nodes | test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | | test.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request | -| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:9:19:9:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:9:19:9:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:9:19:9:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:10:34:10:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:20:19:20:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:20:19:20:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:20:19:20:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:31:34:31:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:9:34:9:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:10:34:10:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:10:34:10:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | +| test.py:31:34:31:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:31:34:31:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against external entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py b/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py index d9181c4cf34..104f2663d59 100644 --- a/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py +++ b/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py @@ -1,5 +1,6 @@ from flask import Flask, request import lxml.etree +import markupsafe app = Flask(__name__) @@ -28,3 +29,9 @@ def super_vuln_handler(): huge_tree=True, ) return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/sanitized-handler") +def sanitized_handler(): + xml_content = request.args['xml_content'] + xml_content = markupsafe.escape(xml_content) + return lxml.etree.fromstring(xml_content).text \ No newline at end of file