diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.py b/python/ql/src/experimental/Security/CWE-611/XXE.py
deleted file mode 100644
index 7b42c899098..00000000000
--- a/python/ql/src/experimental/Security/CWE-611/XXE.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from flask import request, Flask
-import lxml.etree
-import xml.etree.ElementTree
-
-
-@app.route("/example")
-def example():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
-
- return parsed_xml.text
diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.ql b/python/ql/src/experimental/Security/CWE-611/XXE.ql
deleted file mode 100644
index 78866def1f5..00000000000
--- a/python/ql/src/experimental/Security/CWE-611/XXE.ql
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * @name XML External Entity abuse
- * @description User input should not be parsed by XML parsers without security options enabled.
- * @kind path-problem
- * @problem.severity error
- * @id py/xxe
- * @tags security
- * external/cwe/cwe-611
- * external/cwe/cwe-776
- * external/cwe/cwe-827
- */
-
-// determine precision above
-import python
-import experimental.semmle.python.security.XXE
-import DataFlow::PathGraph
-
-from XXEFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
-where config.hasFlowPath(source, sink)
-select sink.getNode(), source, sink,
- "$@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse",
- sink.getNode(), "This", source.getNode(), "user-provided value"
diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.xml b/python/ql/src/experimental/Security/CWE-611/XXE.xml
new file mode 100644
index 00000000000..ddd196f2f13
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-611/XXE.xml
@@ -0,0 +1,4 @@
+
+]>
+&xxe;
\ No newline at end of file
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlInjection.py
new file mode 100644
index 00000000000..0e9eec933d7
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.py
@@ -0,0 +1,25 @@
+from flask import request, Flask
+import lxml.etree
+import xml.etree.ElementTree
+
+app = Flask(__name__)
+
+# BAD
+@app.route("/bad")
+def bad():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser()
+ parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
+
+ return parsed_xml.text
+
+# GOOD
+@app.route("/good")
+def good():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(resolve_entities=False)
+ parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
+
+ return parsed_xml.text
\ No newline at end of file
diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp
similarity index 71%
rename from python/ql/src/experimental/Security/CWE-611/XXE.qhelp
rename to python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp
index bc00aa2f756..e617835bdef 100644
--- a/python/ql/src/experimental/Security/CWE-611/XXE.qhelp
+++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.qhelp
@@ -5,31 +5,18 @@
-Parsing untrusted XML files with a weakly configured XML parser may lead to an XML External Entity (XXE) attack.
+Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
+Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
-
-Refer to the following links to check the details regarding how and which libraries are vulnerable:
-
-
-
-
-
-This query currently identifies vulnerable XML parsing from the following parsers:
-xml.etree.ElementTree.XMLParser, lxml.etree.XMLParser, lxml.etree.get_default_parser,
-xml.sax.make_parser.
-
-Use defusedxml, a Python package aimed
+Use defusedxml, a Python package aimed
to prevent any potentially malicious operation.
@@ -39,10 +26,17 @@ to prevent any potentially malicious operation.
The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
-
+
+
+Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of
+/etc/passwd.
+
+
+Python 3 XML Vulnerabilities.
+Python 2 XML Vulnerabilities.
Python XML Parsing.
OWASP vulnerability description: XML External Entity (XXE) Processing.
OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
diff --git a/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql
new file mode 100644
index 00000000000..78213f624ea
--- /dev/null
+++ b/python/ql/src/experimental/Security/CWE-611/XmlInjection.ql
@@ -0,0 +1,22 @@
+/**
+ * @name XML injection
+ * @description User input should not be parsed without security options enabled.
+ * @kind path-problem
+ * @problem.severity error
+ * @id py/xml-injection
+ * @tags security
+ * external/cwe/cwe-611
+ * external/cwe/cwe-776
+ * external/cwe/cwe-827
+ */
+
+// determine precision above
+import python
+import experimental.semmle.python.security.dataflow.XmlInjection
+import DataFlow::PathGraph
+
+from DataFlow::PathNode source, DataFlow::PathNode sink, string kind
+where XmlInjection::xmlInjectionVulnerable(source, sink, kind)
+select sink.getNode(), source, sink,
+ "$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(),
+ "This", source.getNode(), "user-provided value"
diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll
index c6b6ed6a0d5..e2dbf0547d8 100644
--- a/python/ql/src/experimental/semmle/python/Concepts.qll
+++ b/python/ql/src/experimental/semmle/python/Concepts.qll
@@ -44,94 +44,84 @@ class LogOutput extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
-/** Provides classes for modeling XML parsing APIs. */
-module XMLParsing {
+module XML {
/**
* A data-flow node that collects functions parsing XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
- abstract class Range extends DataFlow::Node {
+ class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
- abstract DataFlow::Node getAnInput();
+ DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
- * Holds if the parser may be parsing the input dangerously.
- *
- * Specifically, this predicate holds whether the XML parsing parses/extends external
- * entities in the parsed XML stream.
+ * Holds if the parsing method or the parser holding it is vulnerable to `kind`.
*/
- abstract predicate mayBeDangerous();
+ predicate vulnerable(string kind) { super.vulnerable(kind) }
}
-}
-/**
- * A data-flow node that collects functions parsing XML.
- *
- * Extend this class to model new APIs. If you want to refine existing API models,
- * extend `XMLParsing` instead.
- */
-class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
- /**
- * Gets the argument containing the content to parse.
- *
- * Specifically, this predicate holds whether the XML parsing parses/extends external
- * entities in the parsed XML stream.
- */
- DataFlow::Node getAnInput() { result = super.getAnInput() }
+ /** Provides classes for modeling XML parsing APIs. */
+ module XMLParsing {
+ /**
+ * A data-flow node that collects functions parsing XML.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `XMLParsing` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /**
+ * Gets the argument containing the content to parse.
+ */
+ abstract DataFlow::Node getAnInput();
- /**
- * Holds if the parser may be parsing the input dangerously.
- */
- predicate mayBeDangerous() { super.mayBeDangerous() }
-}
+ /**
+ * Holds if the parsing method or the parser holding it is vulnerable to `kind`.
+ */
+ abstract predicate vulnerable(string kind);
+ }
+ }
-/** Provides classes for modeling XML parsers. */
-module XMLParser {
/**
* A data-flow node that collects XML parsers.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParser` instead.
*/
- abstract class Range extends DataFlow::Node {
+ class XMLParser extends DataFlow::Node instanceof XMLParser::Range {
/**
* Gets the argument containing the content to parse.
*/
- abstract DataFlow::Node getAnInput();
+ DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
- * Holds if the parser may be dangerously configured.
- *
- * Specifically, this predicate holds whether the XML parser parses/extends external
- * entities in the parsed XML stream.
+ * Holds if the parser is vulnerable to `kind`.
*/
- abstract predicate mayBeDangerous();
+ predicate vulnerable(string kind) { super.vulnerable(kind) }
}
-}
-/**
- * A data-flow node that collects XML parsers.
- *
- * Extend this class to model new APIs. If you want to refine existing API models,
- * extend `XMLParser` instead.
- */
-class XMLParser extends DataFlow::Node instanceof XMLParser::Range {
- /**
- * Gets the argument containing the content to parse.
- */
- DataFlow::Node getAnInput() { result = super.getAnInput() }
+ /** Provides classes for modeling XML parsers. */
+ module XMLParser {
+ /**
+ * A data-flow node that collects XML parsers.
+ *
+ * Extend this class to model new APIs. If you want to refine existing API models,
+ * extend `XMLParser` instead.
+ */
+ abstract class Range extends DataFlow::Node {
+ /**
+ * Gets the argument containing the content to parse.
+ */
+ abstract DataFlow::Node getAnInput();
- /**
- * Holds if the parser may be dangerously configured.
- *
- * Specifically, this predicate holds whether the XML parser parses/extends external
- * entities in the parsed XML stream.
- */
- predicate mayBeDangerous() { super.mayBeDangerous() }
+ /**
+ * Holds if the parser is vulnerable to `kind`.
+ */
+ abstract predicate vulnerable(string kind);
+ }
+ }
}
/** Provides classes for modeling LDAP query execution-related APIs. */
diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll
index a32d1b0d0de..37620db889a 100644
--- a/python/ql/src/experimental/semmle/python/Frameworks.qll
+++ b/python/ql/src/experimental/semmle/python/Frameworks.qll
@@ -3,7 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
-private import experimental.semmle.python.frameworks.XML
+private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug
diff --git a/python/ql/src/experimental/semmle/python/frameworks/XML.qll b/python/ql/src/experimental/semmle/python/frameworks/XML.qll
deleted file mode 100644
index 8fce0b0172c..00000000000
--- a/python/ql/src/experimental/semmle/python/frameworks/XML.qll
+++ /dev/null
@@ -1,196 +0,0 @@
-/**
- * Provides class and predicates to track external data that
- * may represent malicious XML objects.
- */
-
-private import python
-private import semmle.python.dataflow.new.DataFlow
-private import experimental.semmle.python.Concepts
-private import semmle.python.ApiGraphs
-
-private module XML {
- /** Gets a reference to `xml.etree.ElementTree`. */
- private API::Node xmlEtree() {
- result = API::moduleImport("xml").getMember("etree").getMember("ElementTree")
- }
-
- /** Gets a call to `xml.etree.ElementTree.XMLParser`. */
- private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range {
- XMLEtreeParser() { this = xmlEtree().getMember("XMLParser").getACall() }
-
- override DataFlow::Node getAnInput() { none() }
-
- override predicate mayBeDangerous() { any() }
- }
-
- /**
- * Gets a call to `xml.etree.ElementTree.fromstring`, `xml.etree.ElementTree.fromstringlist`,
- * `xml.etree.ElementTree.XML` or `xml.etree.ElementTree.parse`.
- *
- * Given the following example:
- *
- * ```py
- * parser = lxml.etree.XMLParser()
- * parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
- * ```
- *
- * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`
- * and `xml_content` would be the result of `getAnInput()`.
- */
- private class XMLEtreeParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
- XMLEtreeParsing() {
- this = xmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall()
- }
-
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
-
- override predicate mayBeDangerous() {
- exists(XMLParser xmlParser |
- xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
- )
- }
- }
-
- /** Gets a reference to `xml.sax`. */
- private API::Node xmlSax() { result = API::moduleImport("xml").getMember("sax") }
-
- /**
- * Gets a call to `xml.sax.make_parser` and following calls.
- *
- * Given the following example:
- *
- * ```py
- * BadHandler = MainHandler()
- * parser = xml.sax.make_parser()
- * parser.setContentHandler(BadHandler)
- * parser.setFeature(xml.sax.handler.feature_external_ges, False)
- * parser.parse(StringIO(xml_content))
- * parsed_xml = BadHandler._result
- * ```
- *
- * `this` would be `xml.sax.make_parser()`, `getAnInput()` would return `StringIO(xml_content)`
- * and `mayBeDangerous()` would not hold since `xml.sax.handler.feature_external_ges` is set to
- * `False` and so is not vulnerable.
- * see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
- */
- private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range {
- DataFlow::CallCfgNode attrCall;
-
- XMLSaxParser() {
- this = xmlSax().getMember("make_parser").getACall() and
- attrCall.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = this
- }
-
- override DataFlow::Node getAnInput() {
- attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "parse" and
- result = attrCall.getArg(0)
- }
-
- override predicate mayBeDangerous() {
- attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "setFeature" and
- attrCall.getArg(0) = xmlSax().getMember("handler").getMember("feature_external_ges").getAUse() and
- DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), attrCall.getArg(1))
- }
- }
-
- /** Gets a reference to `lxml.etree`. */
- private API::Node lxmlEtree() { result = API::moduleImport("lxml").getMember("etree") }
-
- /**
- * Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()`
- * identifies whether the argument `no_network` is set to `False` or the arguments `huge_tree`
- * or `resolve_entities` are set to True. Since `resolve_entities` default value is `True`,
- * the predicate will also succeed if the argument is not set.
- */
- private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range {
- LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() }
-
- override DataFlow::Node getAnInput() { none() }
-
- override predicate mayBeDangerous() {
- DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or
- DataFlow::localFlow(DataFlow::exprNode(any(True trueName)),
- this.getArgByName(["huge_tree", "resolve_entities"])) or
- not exists(this.getArgByName("resolve_entities"))
- }
- }
-
- /**
- * Gets a call to `lxml.etree.fromstring`, `xml.etree.fromstringlist`,
- * `xml.etree.XML` or `xml.etree.parse`.
- *
- * Given the following example:
- *
- * ```py
- * parser = lxml.etree.XMLParser()
- * parsed_xml = lxml.etree.fromstring(xml_content, parser=parser).text
- * ```
- *
- * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`
- * and `xml_content` would be the result of `getAnInput()`.
- */
- private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
- LXMLParsing() {
- this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall()
- }
-
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
-
- override predicate mayBeDangerous() {
- exists(XMLParser xmlParser |
- xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
- )
- or
- not exists(this.getArgByName("parser"))
- }
- }
-
- /** Gets a reference to the `xmltodict` module. */
- private API::Node xmltodict() { result = API::moduleImport("xmltodict") }
-
- /**
- * Gets a call to `xmltodict.parse` and `mayBeDangerous()` identifies
- * whether the argument `disable_entities` is set to `False`.
- */
- private class XMLtoDictParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
- XMLtoDictParsing() { this = xmltodict().getMember("parse").getACall() }
-
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
-
- override predicate mayBeDangerous() {
- DataFlow::localFlow(DataFlow::exprNode(any(False falseName)),
- this.getArgByName("disable_entities"))
- }
- }
-
- /** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */
- private API::Node xmlDom() {
- result = API::moduleImport("xml").getMember("dom").getMember(["minidom", "pulldom"])
- }
-
- /**
- * Gets a call to `xml.dom.minidom.parse` or `xml.dom.pulldom.parse`.
- *
- * Given the following example:
- *
- * ```py
- * parser = xml.sax.make_parser()
- * parser.setFeature(xml.sax.handler.feature_external_ges, True)
- * parsed_xml = xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNod
- * ```
- *
- * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`
- * and `StringIO(xml_content)` would be the result of `getAnInput()`.
- */
- private class XMLDomParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
- XMLDomParsing() { this = xmlDom().getMember("parse").getACall() }
-
- override DataFlow::Node getAnInput() { result = this.getArg(0) }
-
- override predicate mayBeDangerous() {
- exists(XMLParser xmlParser |
- xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
- )
- }
- }
-}
diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
new file mode 100644
index 00000000000..d2d1927e953
--- /dev/null
+++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll
@@ -0,0 +1,301 @@
+/**
+ * Provides class and predicates to track external data that
+ * may represent malicious XML objects.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import experimental.semmle.python.Concepts
+private import semmle.python.ApiGraphs
+
+private module Xml {
+ /**
+ * Gets a call to `xml.etree.ElementTree.XMLParser`.
+ */
+ private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
+ XMLEtreeParser() {
+ this =
+ API::moduleImport("xml")
+ .getMember("etree")
+ .getMember("ElementTree")
+ .getMember("XMLParser")
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { none() }
+
+ override predicate vulnerable(string kind) { none() }
+ }
+
+ /**
+ * Gets a call to:
+ * * `xml.etree.ElementTree.fromstring`
+ * * `xml.etree.ElementTree.fromstringlist`
+ * * `xml.etree.ElementTree.XML`
+ * * `xml.etree.ElementTree.parse`
+ *
+ * Given the following example:
+ *
+ * ```py
+ * parser = lxml.etree.XMLParser()
+ * xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
+ * ```
+ *
+ * * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`.
+ * * `getAnInput()`'s result would be `xml_content`.
+ * * `vulnerable(kind)`'s `kind` would be `XXE`.
+ */
+ private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ XMLEtreeParsing() {
+ this =
+ API::moduleImport("xml")
+ .getMember("etree")
+ .getMember("ElementTree")
+ .getMember(["fromstring", "fromstringlist", "XML", "parse"])
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { result = this.getArg(0) }
+
+ override predicate vulnerable(string kind) {
+ exists(XML::XMLParser xmlParser |
+ xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
+ )
+ }
+ }
+
+ /** Gets a reference to a `parser` that has been set a `feature`. */
+ private DataFlow::Node trackSaxFeature(
+ DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature
+ ) {
+ t.start() and
+ exists(DataFlow::MethodCallNode featureCall |
+ featureCall = parser.getAMethodCall("setFeature") and
+ featureCall.getArg(0).getALocalSource() = feature.getAUse() and
+ featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and
+ result = featureCall.getObject()
+ )
+ or
+ exists(DataFlow::TypeTracker t2 |
+ t = t2.smallstep(trackSaxFeature(t2, parser, feature), result)
+ )
+ }
+
+ /** Gets a reference to a `parser` that has been set a `feature`. */
+ DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) {
+ result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature)
+ }
+
+ /**
+ * Gets a call to `xml.sax.make_parser`.
+ *
+ * Given the following example:
+ *
+ * ```py
+ * BadHandler = MainHandler()
+ * parser = xml.sax.make_parser()
+ * parser.setContentHandler(BadHandler)
+ * parser.setFeature(xml.sax.handler.feature_external_ges, False)
+ * parser.parse(StringIO(xml_content))
+ * parsed_xml = BadHandler._result
+ * ```
+ *
+ * * `this` would be `xml.sax.make_parser()`.
+ * * `getAnInput()`'s result would be `StringIO(xml_content)`.
+ * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ */
+ private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
+ XMLSaxParser() {
+ this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall()
+ }
+
+ override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
+
+ override predicate vulnerable(string kind) {
+ exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
+ handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
+ parse.calls(trackSaxFeature(this, feature), "parse") and
+ parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
+ |
+ kind = ["XXE", "DTD retrieval"] and
+ feature = handler.getMember("feature_external_ges")
+ or
+ kind = ["Billion Laughs", "Quadratic Blowup"]
+ )
+ }
+
+ predicate vulnerable(DataFlow::Node n, string kind) {
+ exists(API::Node handler, API::Node feature |
+ handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
+ DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
+ .(DataFlow::LocalSourceNode)
+ .flowsTo(n)
+ |
+ kind = ["XXE", "DTD retrieval"] and
+ feature = handler.getMember("feature_external_ges")
+ )
+ }
+ }
+
+ /**
+ * Gets a call to:
+ * * `lxml.etree.XMLParser`
+ * * `lxml.etree.get_default_parser`
+ *
+ * Given the following example:
+ *
+ * ```py
+ * lxml.etree.XMLParser()
+ * ```
+ *
+ * * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`.
+ * * `vulnerable(kind)`'s `kind` would be `XXE`
+ */
+ private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
+ LXMLParser() {
+ this =
+ API::moduleImport("lxml")
+ .getMember("etree")
+ .getMember(["XMLParser", "get_default_parser"])
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { none() }
+
+ override predicate vulnerable(string kind) {
+ kind = "XXE" and not this.getArgByName("resolve_entities").asExpr() = any(False f)
+ or
+ kind = ["Billion Laughs", "Quadratic Blowup"] and
+ (
+ this.getArgByName("huge_tree").asExpr() = any(True t) and
+ not this.getArgByName("resolve_entities").asExpr() = any(False f)
+ )
+ }
+ }
+
+ /**
+ * Gets a call to:
+ * * `lxml.etree.fromstring`
+ * * `xml.etree.fromstringlist`
+ * * `xml.etree.XML`
+ * * `xml.etree.parse`
+ *
+ * Given the following example:
+ *
+ * ```py
+ * parser = lxml.etree.XMLParser()
+ * lxml.etree.fromstring(xml_content, parser=parser).text
+ * ```
+ *
+ * * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`.
+ * * `getAnInput()`'s result would be `xml_content`.
+ * * `vulnerable(kind)`'s `kind` would be `XXE`.
+ */
+ private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ LXMLParsing() {
+ this =
+ API::moduleImport("lxml")
+ .getMember("etree")
+ .getMember(["fromstring", "fromstringlist", "XML", "parse"])
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { result = this.getArg(0) }
+
+ override predicate vulnerable(string kind) {
+ exists(XML::XMLParser xmlParser |
+ xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
+ )
+ or
+ kind = "XXE" and not exists(this.getArgByName("parser"))
+ }
+ }
+
+ /**
+ * Gets a call to `xmltodict.parse`.
+ *
+ * Given the following example:
+ *
+ * ```py
+ * xmltodict.parse(xml_content, disable_entities=False)
+ * ```
+ *
+ * * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`.
+ * * `getAnInput()`'s result would be `xml_content`.
+ * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ */
+ private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
+
+ override DataFlow::Node getAnInput() { result = this.getArg(0) }
+
+ override predicate vulnerable(string kind) {
+ kind = ["Billion Laughs", "Quadratic Blowup"] and
+ this.getAMethodCall("disable_entities").asExpr() = any(False f)
+ }
+ }
+
+ /**
+ * Gets a call to:
+ * * `xml.dom.minidom.parse`
+ * * `xml.dom.pulldom.parse`
+ *
+ * Given the following example:
+ *
+ * ```py
+ * xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode
+ * ```
+ *
+ * * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`.
+ * * `getAnInput()`'s result would be `StringIO(xml_content)`.
+ * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ */
+ private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
+ XMLDomParsing() {
+ this =
+ API::moduleImport("xml")
+ .getMember("dom")
+ .getMember(["minidom", "pulldom"])
+ .getMember(["parse", "parseString"])
+ .getACall()
+ }
+
+ override DataFlow::Node getAnInput() { result = this.getArg(0) }
+
+ override predicate vulnerable(string kind) {
+ exists(XML::XMLParser xmlParser |
+ xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
+ )
+ or
+ kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser"))
+ }
+ }
+
+ /**
+ * Gets a call to `xmlrpc.server.SimpleXMLRPCServer`.
+ *
+ * Given the following example:
+ *
+ * ```py
+ * server = SimpleXMLRPCServer(("127.0.0.1", 8000))
+ * server.register_function(foo, "foo")
+ * server.serve_forever()
+ * ```
+ *
+ * * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`.
+ * * `getAnInput()`'s result would be `foo`.
+ * * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
+ */
+ private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range {
+ XMLRPCServer() {
+ this =
+ API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall()
+ }
+
+ override DataFlow::Node getAnInput() {
+ result = this.getAMethodCall("register_function").getArg(0)
+ }
+
+ override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] }
+ }
+}
diff --git a/python/ql/src/experimental/semmle/python/security/XXE.qll b/python/ql/src/experimental/semmle/python/security/XXE.qll
deleted file mode 100644
index 7998d4081db..00000000000
--- a/python/ql/src/experimental/semmle/python/security/XXE.qll
+++ /dev/null
@@ -1,37 +0,0 @@
-import python
-import experimental.semmle.python.Concepts
-import semmle.python.dataflow.new.DataFlow
-import semmle.python.dataflow.new.TaintTracking
-import semmle.python.dataflow.new.RemoteFlowSources
-import semmle.python.dataflow.new.BarrierGuards
-import semmle.python.ApiGraphs
-
-/**
- * A taint-tracking configuration for detecting XML External entities abuse.
- *
- * This configuration uses `RemoteFlowSource` as a source because there's no
- * risk at parsing not user-supplied input without security options enabled.
- */
-class XXEFlowConfig extends TaintTracking::Configuration {
- XXEFlowConfig() { this = "XXEFlowConfig" }
-
- override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
-
- override predicate isSink(DataFlow::Node sink) {
- exists(XMLParsing xmlParsing | xmlParsing.mayBeDangerous() and sink = xmlParsing.getAnInput())
- or
- exists(XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.mayBeDangerous())
- }
-
- override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
- guard instanceof StringConstCompare
- }
-
- override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
- exists(DataFlow::CallCfgNode ioCalls |
- ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
- nodeFrom = ioCalls.getArg(0) and
- nodeTo = ioCalls
- )
- }
-}
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll
new file mode 100644
index 00000000000..90e2c9bf342
--- /dev/null
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjection.qll
@@ -0,0 +1,44 @@
+import python
+import experimental.semmle.python.Concepts
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.dataflow.new.RemoteFlowSources
+import semmle.python.dataflow.new.BarrierGuards
+
+module XmlInjection {
+ import XmlInjectionCustomizations::XmlInjection
+
+ class XMLInjectionConfiguration extends TaintTracking::Configuration {
+ XMLInjectionConfiguration() { this = "XMLInjectionConfiguration" }
+
+ override predicate isSource(DataFlow::Node source) {
+ source instanceof RemoteFlowSourceAsSource
+ }
+
+ override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
+
+ override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
+ guard instanceof SanitizerGuard
+ }
+
+ override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ ioAdditionalTaintStep(nodeFrom, nodeTo)
+ }
+ }
+
+ private import DataFlow::PathGraph
+
+ /** Holds if there is an XML injection from `source` to `sink` */
+ predicate xmlInjection(DataFlow::PathNode source, DataFlow::PathNode sink) {
+ any(XMLInjectionConfiguration xmlInjectionConfig).hasFlowPath(source, sink)
+ }
+
+ /** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */
+ predicate xmlInjectionVulnerable(DataFlow::PathNode source, DataFlow::PathNode sink, string kind) {
+ xmlInjection(source, sink) and
+ (
+ xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or
+ xmlParserInputAsVulnerableSink(sink.getNode(), kind)
+ )
+ }
+}
diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll
new file mode 100644
index 00000000000..3e9dd22c69c
--- /dev/null
+++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlInjectionCustomizations.qll
@@ -0,0 +1,95 @@
+/**
+ * Provides default sources, sinks and sanitizers for detecting
+ * "ldap injection"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+
+private import python
+private import semmle.python.dataflow.new.DataFlow
+private import experimental.semmle.python.Concepts
+private import semmle.python.dataflow.new.RemoteFlowSources
+private import semmle.python.dataflow.new.BarrierGuards
+private import semmle.python.ApiGraphs
+
+/**
+ * Provides default sources, sinks and sanitizers for detecting "xml injection"
+ * vulnerabilities, as well as extension points for adding your own.
+ */
+module XmlInjection {
+ /**
+ * A data flow source for "xml injection" vulnerabilities.
+ */
+ abstract class Source extends DataFlow::Node { }
+
+ /**
+ * A data flow sink for "xml injection" vulnerabilities.
+ */
+ abstract class Sink extends DataFlow::Node { }
+
+ /**
+ * A sanitizer guard for "xml injection" vulnerabilities.
+ */
+ abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
+
+ /**
+ * A data flow sink for XML parsing libraries.
+ *
+ * See `XML::XMLParsing`.
+ */
+ abstract class XMLParsingSink extends Sink { }
+
+ /**
+ * A data flow sink for XML parsers.
+ *
+ * See `XML::XMLParser`
+ */
+ abstract class XMLParserSink extends Sink { }
+
+ /**
+ * A source of remote user input, considered as a flow source.
+ */
+ class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
+
+ /**
+ * An xml parsing operation, considered as a flow sink.
+ */
+ class XMLParsingInputAsSink extends XMLParsingSink {
+ XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() }
+ }
+
+ /**
+ * An xml parsing operation vulnerable to `kind`.
+ */
+ predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) {
+ exists(XML::XMLParsing xmlParsing |
+ sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind)
+ )
+ }
+
+ /**
+ * An xml parser operation, considered as a flow sink.
+ */
+ class XMLParserInputAsSink extends XMLParserSink {
+ XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() }
+ }
+
+ /**
+ * An xml parser operation vulnerable to `kind`.
+ */
+ predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) {
+ exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind))
+ }
+
+ /**
+ * A comparison with a constant string, considered as a sanitizer-guard.
+ */
+ class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
+
+ predicate ioAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
+ exists(DataFlow::CallCfgNode ioCalls |
+ ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
+ nodeFrom = ioCalls.getArg(0) and
+ nodeTo = ioCalls
+ )
+ }
+}
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref
deleted file mode 100644
index ada2b1e5202..00000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.qlref
+++ /dev/null
@@ -1 +0,0 @@
-experimental/Security/CWE-611/XXE.ql
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected
similarity index 100%
rename from python/ql/test/experimental/query-tests/Security/CWE-611/XXE.expected
rename to python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.expected
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref
new file mode 100644
index 00000000000..24d483666ac
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlInjection.qlref
@@ -0,0 +1 @@
+experimental/Security/CWE-611/XmlInjection.ql
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py b/python/ql/test/experimental/query-tests/Security/CWE-611/general.py
deleted file mode 100644
index c9f8cc984bd..00000000000
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/general.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from flask import request, Flask
-from io import StringIO, BytesIO
-import xml.etree
-import xml.etree.ElementTree
-import lxml.etree
-import xml.dom.minidom
-import xml.dom.pulldom
-import xmltodict
-
-
-app = Flask(__name__)
-
-# xml_content = ']>&xxe;'
-
-
-@app.route("/lxml.etree.fromstring")
-def lxml_fromstring():
- xml_content = request.args['xml_content']
-
- return lxml.etree.fromstring(xml_content).text
-
-
-@app.route("/lxml.etree.XML")
-def lxml_XML():
- xml_content = request.args['xml_content']
-
- return lxml.etree.XML(xml_content).text
-
-
-@app.route("/lxml.etree.parse")
-def lxml_parse():
- xml_content = request.args['xml_content']
-
- return lxml.etree.parse(StringIO(xml_content)).text
-
-
-@app.route("/xmltodict.parse")
-def xmltodict_parse():
- xml_content = request.args['xml_content']
-
- return xmltodict.parse(xml_content, disable_entities=False)
-
-
-@app.route("/lxml.etree.XMLParser+lxml.etree.fromstring")
-def lxml_XMLParser_fromstring():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- return lxml.etree.fromstring(xml_content, parser=parser).text
-
-
-@app.route("/lxml.etree.get_default_parser+lxml.etree.fromstring")
-def lxml_defaultParser_fromstring():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.get_default_parser()
- return lxml.etree.fromstring(xml_content, parser=parser).text
-
-
-@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.fromstring")
-def lxml_XMLParser_xml_fromstring():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
-
-
-@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.parse")
-def lxml_XMLParser_xml_parse():
- xml_content = request.args['xml_content']
-
- parser = lxml.etree.XMLParser()
- return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
new file mode 100644
index 00000000000..2c3c6f5f2ff
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/lxml_etree.py
@@ -0,0 +1,76 @@
+from flask import request, Flask
+from io import StringIO, BytesIO
+import lxml.etree
+
+app = Flask(__name__)
+
+# Parsing
+
+@app.route("/lxml_etree_fromstring")
+def lxml_etree_fromstring():
+ xml_content = request.args['xml_content']
+
+ return lxml.etree.fromstring(xml_content).text
+
+@app.route("/lxml_etree_fromstringlist")
+def lxml_etree_fromstringlist():
+ xml_content = request.args['xml_content']
+
+ return lxml.etree.fromstringlist([xml_content]).text
+
+@app.route("/lxml_etree_XML")
+def lxml_etree_XML():
+ xml_content = request.args['xml_content']
+
+ return lxml.etree.XML(xml_content).text
+
+@app.route("/lxml_etree_parse")
+def lxml_etree_parse():
+ xml_content = request.args['xml_content']
+
+ return lxml.etree.parse(StringIO(xml_content)).getroot().text
+
+# With parsers - Default
+
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser()
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
+@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.get_default_parser()
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
+# With parsers - With options
+
+# XXE-safe
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(resolve_entities=False)
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
+# Billion laughs and quadratic blowup (huge_tree)
+
+## Good (huge_tree=True but resolve_entities=False)
+
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
+ return lxml.etree.fromstring(xml_content, parser=parser).text
+
+## Bad
+@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
+def lxml_parser():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser(huge_tree=True)
+ return lxml.etree.fromstring(xml_content, parser=parser).text
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
new file mode 100644
index 00000000000..428a2d645a1
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_dom.py
@@ -0,0 +1,44 @@
+from flask import request, Flask
+from io import StringIO, BytesIO
+import xml.dom.minidom
+import xml.dom.pulldom
+import xml.sax
+
+app = Flask(__name__)
+
+# Parsing
+
+@app.route("/xml_minidom_parse")
+def xml_minidom_parse():
+ xml_content = request.args['xml_content']
+
+ return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes
+
+@app.route("/xml_minidom_parseString")
+def xml_minidom_parseString():
+ xml_content = request.args['xml_content']
+
+ return xml.dom.minidom.parseString(xml_content).documentElement.childNodes
+
+@app.route("/xml_pulldom_parse")
+def xml_pulldom_parse():
+ xml_content = request.args['xml_content']
+
+ return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes
+
+@app.route("/xml_pulldom_parseString")
+def xml_pulldom_parseString():
+ xml_content = request.args['xml_content']
+
+ return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes
+
+# With parsers
+
+@app.route("/xml_minidom_parse_xml_sax_make_parser")
+def xml_minidom_parse_xml_sax_make_parser():
+ xml_content = request.args['xml_content']
+
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes
+
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py
new file mode 100644
index 00000000000..caa321c0926
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_etree.py
@@ -0,0 +1,66 @@
+from flask import request, Flask
+from io import StringIO, BytesIO
+import xml.etree
+import xml.etree.ElementTree
+import lxml.etree
+
+app = Flask(__name__)
+
+# xxe = ']>&xxe;'
+
+# Parsing
+
+@app.route("/xml_etree_fromstring")
+def xml_etree_fromstring():
+ xml_content = request.args['xml_content']
+
+ return xml.etree.ElementTree.fromstring(xml_content).text
+
+@app.route("/xml_etree_fromstringlist")
+def xml_etree_fromstringlist():
+ xml_content = request.args['xml_content']
+
+ return xml.etree.ElementTree.fromstringlist(xml_content).text
+
+@app.route("/xml_etree_XML")
+def xml_etree_XML():
+ xml_content = request.args['xml_content']
+
+ return xml.etree.ElementTree.XML(xml_content).text
+
+@app.route("/xml_etree_parse")
+def xml_etree_parse():
+ xml_content = request.args['xml_content']
+
+ return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text
+
+# With parsers
+
+@app.route("/xml_etree_fromstring-xml_etree_XMLParser")
+def xml_parser_1():
+ xml_content = request.args['xml_content']
+
+ parser = xml.etree.ElementTree.XMLParser()
+ return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
+
+@app.route("/xml_etree_fromstring-lxml_etree_XMLParser")
+def xml_parser_2():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.XMLParser()
+ return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
+
+@app.route("/xml_etree_fromstring-lxml_get_default_parser")
+def xml_parser_3():
+ xml_content = request.args['xml_content']
+
+ parser = lxml.etree.get_default_parser()
+ return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
+
+@app.route("/xml_etree_fromstring-lxml_get_default_parser")
+def xml_parser_4():
+ xml_content = request.args['xml_content']
+
+ parser = xml.sax.make_parser()
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
\ No newline at end of file
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
index b48dee89abe..0d7bbcaee61 100644
--- a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_sax_make_parser.py
@@ -2,7 +2,7 @@ from flask import request, Flask
from io import StringIO
import xml.sax
-# xml_content = ']>&xxe;'
+# xxe = ']>&xxe;'
app = Flask(__name__)
@@ -74,12 +74,28 @@ def xml_makeparser_minidom_entitiesTrue():
parser.setFeature(xml.sax.handler.feature_external_ges, True)
return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes
-# Forward Type Tracker test
+# Forward Type Tracking test
+
+@app.route("forward_tracking1")
+def forward_tracking1(action):
+ xml_content = request.args['xml_content']
-def contrived_example(user_input, action):
parser = xml.sax.make_parser()
if action == 'load-config':
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse("/not-user-controlled/default_config.xml")
else:
- parser.parse(StringIO(user_input))
\ No newline at end of file
+ parser.parse(StringIO(xml_content))
+ return
+
+@app.route("forward_tracking2")
+def forward_tracking2(action):
+ xml_content = request.args['xml_content']
+
+ parser = xml.sax.make_parser()
+ if action == 'load-config':
+ parser.setFeature(xml.sax.handler.feature_external_ges, True)
+ parser.parse("/not-user-controlled/default_config.xml")
+ else:
+ parser.parse(StringIO(xml_content))
+ return
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
new file mode 100644
index 00000000000..2b91a22e1a2
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xml_to_dict.py
@@ -0,0 +1,17 @@
+from flask import request, Flask
+from io import StringIO, BytesIO
+import xmltodict
+
+app = Flask(__name__)
+
+@app.route("/xmltodict.parse")
+def xmltodict_parse():
+ xml_content = request.args['xml_content']
+
+ return xmltodict.parse(xml_content)
+
+@app.route("/xmltodict.parse2")
+def xmltodict_parse2():
+ xml_content = request.args['xml_content']
+
+ return xmltodict.parse(xml_content, disable_entities=False)
\ No newline at end of file
diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py
new file mode 100644
index 00000000000..baa433c4a8a
--- /dev/null
+++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py
@@ -0,0 +1,10 @@
+from xmlrpc.server import SimpleXMLRPCServer
+
+def foo(n):
+ return n
+
+server = SimpleXMLRPCServer(("127.0.0.1", 8000))
+server.register_function(foo, "foo")
+server.serve_forever()
+
+# billion_laughs -> curl 127.0.0.1:8000 --data-raw ']>foo&lol9;'