This commit is contained in:
jorgectf
2022-02-08 17:23:18 +01:00
parent d2f07e4df2
commit 8f9cd16806
23 changed files with 784 additions and 421 deletions

View File

@@ -1,13 +0,0 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
@app.route("/example")
def example():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -1,22 +0,0 @@
/**
* @name XML External Entity abuse
* @description User input should not be parsed by XML parsers without security options enabled.
* @kind path-problem
* @problem.severity error
* @id py/xxe
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.XXE
import DataFlow::PathGraph
from XXEFlowConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
where config.hasFlowPath(source, sink)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and isn't secured against XML External Entities abuse",
sink.getNode(), "This", source.getNode(), "user-provided value"

View File

@@ -0,0 +1,4 @@
<?xml version="1.0"?>
<!DOCTYPE dt [
<!ENTITY xxe SYSTEM "file:///etc/passwd">]>
<test>&xxe;</test>

View File

@@ -0,0 +1,25 @@
from flask import request, Flask
import lxml.etree
import xml.etree.ElementTree
app = Flask(__name__)
# BAD
@app.route("/bad")
def bad():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text
# GOOD
@app.route("/good")
def good():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser)
return parsed_xml.text

View File

@@ -5,31 +5,18 @@
<overview>
<p>
Parsing untrusted XML files with a weakly configured XML parser may lead to an XML External Entity (XXE) attack.
Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE),
Billion Laughs, Quadratic Blowup and DTD retrieval.
This type of attack uses external entity references to access arbitrary files on a system, carry out denial of
service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band
data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out
in this situation.
</p>
<p>
Refer to the following links to check the details regarding how and which libraries are vulnerable:
</p>
<ul>
<li><a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">Python 3</a>.</li>
<li><a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">Python 2</a>.</li>
</ul>
<p>
This query currently identifies vulnerable XML parsing from the following parsers:
<code>xml.etree.ElementTree.XMLParser</code>, <code>lxml.etree.XMLParser</code>, <code>lxml.etree.get_default_parser</code>,
<code>xml.sax.make_parser</code>.
</p>
</overview>
<recommendation>
<p>
Use <a href="https://docs.python.org/3/library/xml.html#the-defusedxml-package">defusedxml</a>, a Python package aimed
Use <a href="https://pypi.org/project/defusedxml/">defusedxml</a>, a Python package aimed
to prevent any potentially malicious operation.
</p>
</recommendation>
@@ -39,10 +26,17 @@ to prevent any potentially malicious operation.
The following example calls <code>xml.etree.ElementTree.fromstring</code> using a parser (<code>lxml.etree.XMLParser</code>)
that is not safely configured on untrusted data, and is therefore inherently unsafe.
</p>
<sample src="XXE.py"/>
<sample src="XmlInjection.py"/>
<p>
Providing an input (<code>xml_content</code>) like the following XML content against /bad, the request response would contain the contents of
<code>/etc/passwd</code>.
</p>
<sample src="XXE.xml"/>
</example>
<references>
<li>Python 3 <a href="https://docs.python.org/3/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python 2 <a href="https://docs.python.org/2/library/xml.html#xml-vulnerabilities">XML Vulnerabilities</a>.</li>
<li>Python <a href="https://www.edureka.co/blog/python-xml-parser-tutorial/">XML Parsing</a>.</li>
<li>OWASP vulnerability description: <a href="https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing">XML External Entity (XXE) Processing</a>.</li>
<li>OWASP guidance on parsing xml files: <a href="https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html#python">XXE Prevention Cheat Sheet</a>.</li>

View File

@@ -0,0 +1,22 @@
/**
* @name XML injection
* @description User input should not be parsed without security options enabled.
* @kind path-problem
* @problem.severity error
* @id py/xml-injection
* @tags security
* external/cwe/cwe-611
* external/cwe/cwe-776
* external/cwe/cwe-827
*/
// determine precision above
import python
import experimental.semmle.python.security.dataflow.XmlInjection
import DataFlow::PathGraph
from DataFlow::PathNode source, DataFlow::PathNode sink, string kind
where XmlInjection::xmlInjectionVulnerable(source, sink, kind)
select sink.getNode(), source, sink,
"$@ XML input is constructed from a $@ and is vulnerable to " + kind + ".", sink.getNode(),
"This", source.getNode(), "user-provided value"

View File

@@ -44,6 +44,25 @@ class LogOutput extends DataFlow::Node {
DataFlow::Node getAnInput() { result = range.getAnInput() }
}
module XML {
/**
* A data-flow node that collects functions parsing XML.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if the parsing method or the parser holding it is vulnerable to `kind`.
*/
predicate vulnerable(string kind) { super.vulnerable(kind) }
}
/** Provides classes for modeling XML parsing APIs. */
module XMLParsing {
/**
@@ -59,34 +78,28 @@ module XMLParsing {
abstract DataFlow::Node getAnInput();
/**
* Holds if the parser may be parsing the input dangerously.
*
* Specifically, this predicate holds whether the XML parsing parses/extends external
* entities in the parsed XML stream.
* Holds if the parsing method or the parser holding it is vulnerable to `kind`.
*/
abstract predicate mayBeDangerous();
abstract predicate vulnerable(string kind);
}
}
/**
* A data-flow node that collects functions parsing XML.
* A data-flow node that collects XML parsers.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParsing` instead.
* extend `XMLParser` instead.
*/
class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range {
class XMLParser extends DataFlow::Node instanceof XMLParser::Range {
/**
* Gets the argument containing the content to parse.
*
* Specifically, this predicate holds whether the XML parsing parses/extends external
* entities in the parsed XML stream.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if the parser may be parsing the input dangerously.
* Holds if the parser is vulnerable to `kind`.
*/
predicate mayBeDangerous() { super.mayBeDangerous() }
predicate vulnerable(string kind) { super.vulnerable(kind) }
}
/** Provides classes for modeling XML parsers. */
@@ -104,34 +117,11 @@ module XMLParser {
abstract DataFlow::Node getAnInput();
/**
* Holds if the parser may be dangerously configured.
*
* Specifically, this predicate holds whether the XML parser parses/extends external
* entities in the parsed XML stream.
* Holds if the parser is vulnerable to `kind`.
*/
abstract predicate mayBeDangerous();
abstract predicate vulnerable(string kind);
}
}
/**
* A data-flow node that collects XML parsers.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `XMLParser` instead.
*/
class XMLParser extends DataFlow::Node instanceof XMLParser::Range {
/**
* Gets the argument containing the content to parse.
*/
DataFlow::Node getAnInput() { result = super.getAnInput() }
/**
* Holds if the parser may be dangerously configured.
*
* Specifically, this predicate holds whether the XML parser parses/extends external
* entities in the parsed XML stream.
*/
predicate mayBeDangerous() { super.mayBeDangerous() }
}
/** Provides classes for modeling LDAP query execution-related APIs. */

View File

@@ -3,7 +3,7 @@
*/
private import experimental.semmle.python.frameworks.Stdlib
private import experimental.semmle.python.frameworks.XML
private import experimental.semmle.python.frameworks.Xml
private import experimental.semmle.python.frameworks.Flask
private import experimental.semmle.python.frameworks.Django
private import experimental.semmle.python.frameworks.Werkzeug

View File

@@ -1,196 +0,0 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module XML {
/** Gets a reference to `xml.etree.ElementTree`. */
private API::Node xmlEtree() {
result = API::moduleImport("xml").getMember("etree").getMember("ElementTree")
}
/** Gets a call to `xml.etree.ElementTree.XMLParser`. */
private class XMLEtreeParser extends DataFlow::CallCfgNode, XMLParser::Range {
XMLEtreeParser() { this = xmlEtree().getMember("XMLParser").getACall() }
override DataFlow::Node getAnInput() { none() }
override predicate mayBeDangerous() { any() }
}
/**
* Gets a call to `xml.etree.ElementTree.fromstring`, `xml.etree.ElementTree.fromstringlist`,
* `xml.etree.ElementTree.XML` or `xml.etree.ElementTree.parse`.
*
* Given the following example:
*
* ```py
* parser = lxml.etree.XMLParser()
* parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
* ```
*
* `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`
* and `xml_content` would be the result of `getAnInput()`.
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
XMLEtreeParsing() {
this = xmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate mayBeDangerous() {
exists(XMLParser xmlParser |
xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
)
}
}
/** Gets a reference to `xml.sax`. */
private API::Node xmlSax() { result = API::moduleImport("xml").getMember("sax") }
/**
* Gets a call to `xml.sax.make_parser` and following calls.
*
* Given the following example:
*
* ```py
* BadHandler = MainHandler()
* parser = xml.sax.make_parser()
* parser.setContentHandler(BadHandler)
* parser.setFeature(xml.sax.handler.feature_external_ges, False)
* parser.parse(StringIO(xml_content))
* parsed_xml = BadHandler._result
* ```
*
* `this` would be `xml.sax.make_parser()`, `getAnInput()` would return `StringIO(xml_content)`
* and `mayBeDangerous()` would not hold since `xml.sax.handler.feature_external_ges` is set to
* `False` and so is not vulnerable.
* see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges
*/
private class XMLSaxParser extends DataFlow::CallCfgNode, XMLParser::Range {
DataFlow::CallCfgNode attrCall;
XMLSaxParser() {
this = xmlSax().getMember("make_parser").getACall() and
attrCall.getFunction().(DataFlow::AttrRead).getObject().getALocalSource() = this
}
override DataFlow::Node getAnInput() {
attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "parse" and
result = attrCall.getArg(0)
}
override predicate mayBeDangerous() {
attrCall.getFunction().(DataFlow::AttrRead).getAttributeName() = "setFeature" and
attrCall.getArg(0) = xmlSax().getMember("handler").getMember("feature_external_ges").getAUse() and
DataFlow::localFlow(DataFlow::exprNode(any(True trueName)), attrCall.getArg(1))
}
}
/** Gets a reference to `lxml.etree`. */
private API::Node lxmlEtree() { result = API::moduleImport("lxml").getMember("etree") }
/**
* Gets a call to `lxml.etree.XMLParser` or `lxml.etree.get_default_parser` and `mayBeDangerous()`
* identifies whether the argument `no_network` is set to `False` or the arguments `huge_tree`
* or `resolve_entities` are set to True. Since `resolve_entities` default value is `True`,
* the predicate will also succeed if the argument is not set.
*/
private class LXMLParser extends DataFlow::CallCfgNode, XMLParser::Range {
LXMLParser() { this = lxmlEtree().getMember(["XMLParser", "get_default_parser"]).getACall() }
override DataFlow::Node getAnInput() { none() }
override predicate mayBeDangerous() {
DataFlow::localFlow(DataFlow::exprNode(any(False falseName)), this.getArgByName("no_network")) or
DataFlow::localFlow(DataFlow::exprNode(any(True trueName)),
this.getArgByName(["huge_tree", "resolve_entities"])) or
not exists(this.getArgByName("resolve_entities"))
}
}
/**
* Gets a call to `lxml.etree.fromstring`, `xml.etree.fromstringlist`,
* `xml.etree.XML` or `xml.etree.parse`.
*
* Given the following example:
*
* ```py
* parser = lxml.etree.XMLParser()
* parsed_xml = lxml.etree.fromstring(xml_content, parser=parser).text
* ```
*
* `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`
* and `xml_content` would be the result of `getAnInput()`.
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
LXMLParsing() {
this = lxmlEtree().getMember(["fromstring", "fromstringlist", "XML", "parse"]).getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate mayBeDangerous() {
exists(XMLParser xmlParser |
xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
)
or
not exists(this.getArgByName("parser"))
}
}
/** Gets a reference to the `xmltodict` module. */
private API::Node xmltodict() { result = API::moduleImport("xmltodict") }
/**
* Gets a call to `xmltodict.parse` and `mayBeDangerous()` identifies
* whether the argument `disable_entities` is set to `False`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
XMLtoDictParsing() { this = xmltodict().getMember("parse").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate mayBeDangerous() {
DataFlow::localFlow(DataFlow::exprNode(any(False falseName)),
this.getArgByName("disable_entities"))
}
}
/** Gets a reference to `xml.dom.minidom` or `xml.dom.pulldom`. */
private API::Node xmlDom() {
result = API::moduleImport("xml").getMember("dom").getMember(["minidom", "pulldom"])
}
/**
* Gets a call to `xml.dom.minidom.parse` or `xml.dom.pulldom.parse`.
*
* Given the following example:
*
* ```py
* parser = xml.sax.make_parser()
* parser.setFeature(xml.sax.handler.feature_external_ges, True)
* parsed_xml = xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNod
* ```
*
* `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`
* and `StringIO(xml_content)` would be the result of `getAnInput()`.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XMLParsing::Range {
XMLDomParsing() { this = xmlDom().getMember("parse").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate mayBeDangerous() {
exists(XMLParser xmlParser |
xmlParser.mayBeDangerous() and this.getArgByName("parser").getALocalSource() = xmlParser
)
}
}
}

View File

@@ -0,0 +1,301 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XML objects.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.ApiGraphs
private module Xml {
/**
* Gets a call to `xml.etree.ElementTree.XMLParser`.
*/
private class XMLEtreeParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
XMLEtreeParser() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember("XMLParser")
.getACall()
}
override DataFlow::Node getAnInput() { none() }
override predicate vulnerable(string kind) { none() }
}
/**
* Gets a call to:
* * `xml.etree.ElementTree.fromstring`
* * `xml.etree.ElementTree.fromstringlist`
* * `xml.etree.ElementTree.XML`
* * `xml.etree.ElementTree.parse`
*
* Given the following example:
*
* ```py
* parser = lxml.etree.XMLParser()
* xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
* ```
*
* * `this` would be `xml.etree.ElementTree.fromstring(xml_content, parser=parser)`.
* * `getAnInput()`'s result would be `xml_content`.
* * `vulnerable(kind)`'s `kind` would be `XXE`.
*/
private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLEtreeParsing() {
this =
API::moduleImport("xml")
.getMember("etree")
.getMember("ElementTree")
.getMember(["fromstring", "fromstringlist", "XML", "parse"])
.getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(string kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
}
}
/** Gets a reference to a `parser` that has been set a `feature`. */
private DataFlow::Node trackSaxFeature(
DataFlow::TypeTracker t, DataFlow::CallCfgNode parser, API::Node feature
) {
t.start() and
exists(DataFlow::MethodCallNode featureCall |
featureCall = parser.getAMethodCall("setFeature") and
featureCall.getArg(0).getALocalSource() = feature.getAUse() and
featureCall.getArg(1).getALocalSource() = DataFlow::exprNode(any(True t_)) and
result = featureCall.getObject()
)
or
exists(DataFlow::TypeTracker t2 |
t = t2.smallstep(trackSaxFeature(t2, parser, feature), result)
)
}
/** Gets a reference to a `parser` that has been set a `feature`. */
DataFlow::Node trackSaxFeature(DataFlow::CallCfgNode parser, API::Node feature) {
result = trackSaxFeature(DataFlow::TypeTracker::end(), parser, feature)
}
/**
* Gets a call to `xml.sax.make_parser`.
*
* Given the following example:
*
* ```py
* BadHandler = MainHandler()
* parser = xml.sax.make_parser()
* parser.setContentHandler(BadHandler)
* parser.setFeature(xml.sax.handler.feature_external_ges, False)
* parser.parse(StringIO(xml_content))
* parsed_xml = BadHandler._result
* ```
*
* * `this` would be `xml.sax.make_parser()`.
* * `getAnInput()`'s result would be `StringIO(xml_content)`.
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
*/
private class XMLSaxParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
XMLSaxParser() {
this = API::moduleImport("xml").getMember("sax").getMember("make_parser").getACall()
}
override DataFlow::Node getAnInput() { result = this.getAMethodCall("parse").getArg(0) }
override predicate vulnerable(string kind) {
exists(DataFlow::MethodCallNode parse, API::Node handler, API::Node feature |
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
parse.calls(trackSaxFeature(this, feature), "parse") and
parse.getArg(0) = this.getAnInput() // enough to avoid FPs?
|
kind = ["XXE", "DTD retrieval"] and
feature = handler.getMember("feature_external_ges")
or
kind = ["Billion Laughs", "Quadratic Blowup"]
)
}
predicate vulnerable(DataFlow::Node n, string kind) {
exists(API::Node handler, API::Node feature |
handler = API::moduleImport("xml").getMember("sax").getMember("handler") and
DataFlow::exprNode(trackSaxFeature(this, feature).asExpr())
.(DataFlow::LocalSourceNode)
.flowsTo(n)
|
kind = ["XXE", "DTD retrieval"] and
feature = handler.getMember("feature_external_ges")
)
}
}
/**
* Gets a call to:
* * `lxml.etree.XMLParser`
* * `lxml.etree.get_default_parser`
*
* Given the following example:
*
* ```py
* lxml.etree.XMLParser()
* ```
*
* * `this` would be `lxml.etree.XMLParser(resolve_entities=False)`.
* * `vulnerable(kind)`'s `kind` would be `XXE`
*/
private class LXMLParser extends DataFlow::CallCfgNode, XML::XMLParser::Range {
LXMLParser() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["XMLParser", "get_default_parser"])
.getACall()
}
override DataFlow::Node getAnInput() { none() }
override predicate vulnerable(string kind) {
kind = "XXE" and not this.getArgByName("resolve_entities").asExpr() = any(False f)
or
kind = ["Billion Laughs", "Quadratic Blowup"] and
(
this.getArgByName("huge_tree").asExpr() = any(True t) and
not this.getArgByName("resolve_entities").asExpr() = any(False f)
)
}
}
/**
* Gets a call to:
* * `lxml.etree.fromstring`
* * `xml.etree.fromstringlist`
* * `xml.etree.XML`
* * `xml.etree.parse`
*
* Given the following example:
*
* ```py
* parser = lxml.etree.XMLParser()
* lxml.etree.fromstring(xml_content, parser=parser).text
* ```
*
* * `this` would be `lxml.etree.fromstring(xml_content, parser=parser)`.
* * `getAnInput()`'s result would be `xml_content`.
* * `vulnerable(kind)`'s `kind` would be `XXE`.
*/
private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
LXMLParsing() {
this =
API::moduleImport("lxml")
.getMember("etree")
.getMember(["fromstring", "fromstringlist", "XML", "parse"])
.getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(string kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
or
kind = "XXE" and not exists(this.getArgByName("parser"))
}
}
/**
* Gets a call to `xmltodict.parse`.
*
* Given the following example:
*
* ```py
* xmltodict.parse(xml_content, disable_entities=False)
* ```
*
* * `this` would be `xmltodict.parse(xml_content, disable_entities=False)`.
* * `getAnInput()`'s result would be `xml_content`.
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
*/
private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() }
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(string kind) {
kind = ["Billion Laughs", "Quadratic Blowup"] and
this.getAMethodCall("disable_entities").asExpr() = any(False f)
}
}
/**
* Gets a call to:
* * `xml.dom.minidom.parse`
* * `xml.dom.pulldom.parse`
*
* Given the following example:
*
* ```py
* xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNode
* ```
*
* * `this` would be `xml.dom.minidom.parse(StringIO(xml_content), parser=parser)`.
* * `getAnInput()`'s result would be `StringIO(xml_content)`.
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
*/
private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range {
XMLDomParsing() {
this =
API::moduleImport("xml")
.getMember("dom")
.getMember(["minidom", "pulldom"])
.getMember(["parse", "parseString"])
.getACall()
}
override DataFlow::Node getAnInput() { result = this.getArg(0) }
override predicate vulnerable(string kind) {
exists(XML::XMLParser xmlParser |
xmlParser = this.getArgByName("parser").getALocalSource() and xmlParser.vulnerable(kind)
)
or
kind = ["Billion Laughs", "Quadratic Blowup"] and not exists(this.getArgByName("parser"))
}
}
/**
* Gets a call to `xmlrpc.server.SimpleXMLRPCServer`.
*
* Given the following example:
*
* ```py
* server = SimpleXMLRPCServer(("127.0.0.1", 8000))
* server.register_function(foo, "foo")
* server.serve_forever()
* ```
*
* * `this` would be `SimpleXMLRPCServer(("127.0.0.1", 8000))`.
* * `getAnInput()`'s result would be `foo`.
* * `vulnerable(kind)`'s `kind` would be `Billion Laughs` and `Quadratic Blowup`.
*/
private class XMLRPCServer extends DataFlow::CallCfgNode, XML::XMLParser::Range {
XMLRPCServer() {
this =
API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall()
}
override DataFlow::Node getAnInput() {
result = this.getAMethodCall("register_function").getArg(0)
}
override predicate vulnerable(string kind) { kind = ["Billion Laughs", "Quadratic Blowup"] }
}
}

View File

@@ -1,37 +0,0 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
import semmle.python.ApiGraphs
/**
* A taint-tracking configuration for detecting XML External entities abuse.
*
* This configuration uses `RemoteFlowSource` as a source because there's no
* risk at parsing not user-supplied input without security options enabled.
*/
class XXEFlowConfig extends TaintTracking::Configuration {
XXEFlowConfig() { this = "XXEFlowConfig" }
override predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
override predicate isSink(DataFlow::Node sink) {
exists(XMLParsing xmlParsing | xmlParsing.mayBeDangerous() and sink = xmlParsing.getAnInput())
or
exists(XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.mayBeDangerous())
}
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof StringConstCompare
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}

View File

@@ -0,0 +1,44 @@
import python
import experimental.semmle.python.Concepts
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import semmle.python.dataflow.new.BarrierGuards
module XmlInjection {
import XmlInjectionCustomizations::XmlInjection
class XMLInjectionConfiguration extends TaintTracking::Configuration {
XMLInjectionConfiguration() { this = "XMLInjectionConfiguration" }
override predicate isSource(DataFlow::Node source) {
source instanceof RemoteFlowSourceAsSource
}
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
guard instanceof SanitizerGuard
}
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
ioAdditionalTaintStep(nodeFrom, nodeTo)
}
}
private import DataFlow::PathGraph
/** Holds if there is an XML injection from `source` to `sink` */
predicate xmlInjection(DataFlow::PathNode source, DataFlow::PathNode sink) {
any(XMLInjectionConfiguration xmlInjectionConfig).hasFlowPath(source, sink)
}
/** Holds if there is an XML injection from `source` to `sink` vulnerable to `kind` */
predicate xmlInjectionVulnerable(DataFlow::PathNode source, DataFlow::PathNode sink, string kind) {
xmlInjection(source, sink) and
(
xmlParsingInputAsVulnerableSink(sink.getNode(), kind) or
xmlParserInputAsVulnerableSink(sink.getNode(), kind)
)
}
}

View File

@@ -0,0 +1,95 @@
/**
* Provides default sources, sinks and sanitizers for detecting
* "ldap injection"
* vulnerabilities, as well as extension points for adding your own.
*/
private import python
private import semmle.python.dataflow.new.DataFlow
private import experimental.semmle.python.Concepts
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.dataflow.new.BarrierGuards
private import semmle.python.ApiGraphs
/**
* Provides default sources, sinks and sanitizers for detecting "xml injection"
* vulnerabilities, as well as extension points for adding your own.
*/
module XmlInjection {
/**
* A data flow source for "xml injection" vulnerabilities.
*/
abstract class Source extends DataFlow::Node { }
/**
* A data flow sink for "xml injection" vulnerabilities.
*/
abstract class Sink extends DataFlow::Node { }
/**
* A sanitizer guard for "xml injection" vulnerabilities.
*/
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
/**
* A data flow sink for XML parsing libraries.
*
* See `XML::XMLParsing`.
*/
abstract class XMLParsingSink extends Sink { }
/**
* A data flow sink for XML parsers.
*
* See `XML::XMLParser`
*/
abstract class XMLParserSink extends Sink { }
/**
* A source of remote user input, considered as a flow source.
*/
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
/**
* An xml parsing operation, considered as a flow sink.
*/
class XMLParsingInputAsSink extends XMLParsingSink {
XMLParsingInputAsSink() { this = any(XML::XMLParsing xmlParsing).getAnInput() }
}
/**
* An xml parsing operation vulnerable to `kind`.
*/
predicate xmlParsingInputAsVulnerableSink(DataFlow::Node sink, string kind) {
exists(XML::XMLParsing xmlParsing |
sink = xmlParsing.getAnInput() and xmlParsing.vulnerable(kind)
)
}
/**
* An xml parser operation, considered as a flow sink.
*/
class XMLParserInputAsSink extends XMLParserSink {
XMLParserInputAsSink() { this = any(XML::XMLParser xmlParser).getAnInput() }
}
/**
* An xml parser operation vulnerable to `kind`.
*/
predicate xmlParserInputAsVulnerableSink(DataFlow::Node sink, string kind) {
exists(XML::XMLParser xmlParser | sink = xmlParser.getAnInput() and xmlParser.vulnerable(kind))
}
/**
* A comparison with a constant string, considered as a sanitizer-guard.
*/
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
predicate ioAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(DataFlow::CallCfgNode ioCalls |
ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and
nodeFrom = ioCalls.getArg(0) and
nodeTo = ioCalls
)
}
}

View File

@@ -1 +0,0 @@
experimental/Security/CWE-611/XXE.ql

View File

@@ -0,0 +1 @@
experimental/Security/CWE-611/XmlInjection.ql

View File

@@ -1,73 +0,0 @@
from flask import request, Flask
from io import StringIO, BytesIO
import xml.etree
import xml.etree.ElementTree
import lxml.etree
import xml.dom.minidom
import xml.dom.pulldom
import xmltodict
app = Flask(__name__)
# xml_content = '<?xml version="1.0"?><!DOCTYPE dt [<!ENTITY xxe SYSTEM "file:///etc/passwd">]><test>&xxe;</test>'
@app.route("/lxml.etree.fromstring")
def lxml_fromstring():
xml_content = request.args['xml_content']
return lxml.etree.fromstring(xml_content).text
@app.route("/lxml.etree.XML")
def lxml_XML():
xml_content = request.args['xml_content']
return lxml.etree.XML(xml_content).text
@app.route("/lxml.etree.parse")
def lxml_parse():
xml_content = request.args['xml_content']
return lxml.etree.parse(StringIO(xml_content)).text
@app.route("/xmltodict.parse")
def xmltodict_parse():
xml_content = request.args['xml_content']
return xmltodict.parse(xml_content, disable_entities=False)
@app.route("/lxml.etree.XMLParser+lxml.etree.fromstring")
def lxml_XMLParser_fromstring():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/lxml.etree.get_default_parser+lxml.etree.fromstring")
def lxml_defaultParser_fromstring():
xml_content = request.args['xml_content']
parser = lxml.etree.get_default_parser()
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.fromstring")
def lxml_XMLParser_xml_fromstring():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
@app.route("/lxml.etree.XMLParser+xml.etree.ElementTree.parse")
def lxml_XMLParser_xml_parse():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return xml.etree.ElementTree.parse(StringIO(xml_content), parser=parser).getroot().text

View File

@@ -0,0 +1,76 @@
from flask import request, Flask
from io import StringIO, BytesIO
import lxml.etree
app = Flask(__name__)
# Parsing
@app.route("/lxml_etree_fromstring")
def lxml_etree_fromstring():
xml_content = request.args['xml_content']
return lxml.etree.fromstring(xml_content).text
@app.route("/lxml_etree_fromstringlist")
def lxml_etree_fromstringlist():
xml_content = request.args['xml_content']
return lxml.etree.fromstringlist([xml_content]).text
@app.route("/lxml_etree_XML")
def lxml_etree_XML():
xml_content = request.args['xml_content']
return lxml.etree.XML(xml_content).text
@app.route("/lxml_etree_parse")
def lxml_etree_parse():
xml_content = request.args['xml_content']
return lxml.etree.parse(StringIO(xml_content)).getroot().text
# With parsers - Default
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return lxml.etree.fromstring(xml_content, parser=parser).text
@app.route("/lxml_etree_fromstring-lxml.etree.get_default_parser")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.get_default_parser()
return lxml.etree.fromstring(xml_content, parser=parser).text
# With parsers - With options
# XXE-safe
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False)
return lxml.etree.fromstring(xml_content, parser=parser).text
# Billion laughs and quadratic blowup (huge_tree)
## Good (huge_tree=True but resolve_entities=False)
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
return lxml.etree.fromstring(xml_content, parser=parser).text
## Bad
@app.route("/lxml_etree_fromstring-lxml.etree.XMLParser+")
def lxml_parser():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser(huge_tree=True)
return lxml.etree.fromstring(xml_content, parser=parser).text

View File

@@ -0,0 +1,44 @@
from flask import request, Flask
from io import StringIO, BytesIO
import xml.dom.minidom
import xml.dom.pulldom
import xml.sax
app = Flask(__name__)
# Parsing
@app.route("/xml_minidom_parse")
def xml_minidom_parse():
xml_content = request.args['xml_content']
return xml.dom.minidom.parse(StringIO(xml_content)).documentElement.childNodes
@app.route("/xml_minidom_parseString")
def xml_minidom_parseString():
xml_content = request.args['xml_content']
return xml.dom.minidom.parseString(xml_content).documentElement.childNodes
@app.route("/xml_pulldom_parse")
def xml_pulldom_parse():
xml_content = request.args['xml_content']
return xml.dom.pulldom.parse(StringIO(xml_content))['START_DOCUMENT'][1].documentElement.childNodes
@app.route("/xml_pulldom_parseString")
def xml_pulldom_parseString():
xml_content = request.args['xml_content']
return xml.dom.pulldom.parseString(xml_content)['START_DOCUMENT'][1].documentElement.childNodes
# With parsers
@app.route("/xml_minidom_parse_xml_sax_make_parser")
def xml_minidom_parse_xml_sax_make_parser():
xml_content = request.args['xml_content']
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes

View File

@@ -0,0 +1,66 @@
from flask import request, Flask
from io import StringIO, BytesIO
import xml.etree
import xml.etree.ElementTree
import lxml.etree
app = Flask(__name__)
# xxe = '<?xml version="1.0"?><!DOCTYPE dt [<!ENTITY xxe SYSTEM "file:///etc/passwd">]><test>&xxe;</test>'
# Parsing
@app.route("/xml_etree_fromstring")
def xml_etree_fromstring():
xml_content = request.args['xml_content']
return xml.etree.ElementTree.fromstring(xml_content).text
@app.route("/xml_etree_fromstringlist")
def xml_etree_fromstringlist():
xml_content = request.args['xml_content']
return xml.etree.ElementTree.fromstringlist(xml_content).text
@app.route("/xml_etree_XML")
def xml_etree_XML():
xml_content = request.args['xml_content']
return xml.etree.ElementTree.XML(xml_content).text
@app.route("/xml_etree_parse")
def xml_etree_parse():
xml_content = request.args['xml_content']
return xml.etree.ElementTree.parse(StringIO(xml_content)).getroot().text
# With parsers
@app.route("/xml_etree_fromstring-xml_etree_XMLParser")
def xml_parser_1():
xml_content = request.args['xml_content']
parser = xml.etree.ElementTree.XMLParser()
return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
@app.route("/xml_etree_fromstring-lxml_etree_XMLParser")
def xml_parser_2():
xml_content = request.args['xml_content']
parser = lxml.etree.XMLParser()
return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
@app.route("/xml_etree_fromstring-lxml_get_default_parser")
def xml_parser_3():
xml_content = request.args['xml_content']
parser = lxml.etree.get_default_parser()
return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text
@app.route("/xml_etree_fromstring-lxml_get_default_parser")
def xml_parser_4():
xml_content = request.args['xml_content']
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_external_ges, True)
return xml.etree.ElementTree.fromstring(xml_content, parser=parser).text

View File

@@ -2,7 +2,7 @@ from flask import request, Flask
from io import StringIO
import xml.sax
# xml_content = '<?xml version="1.0"?><!DOCTYPE dt [<!ENTITY xxe SYSTEM "file:///etc/passwd">]><test>&xxe;</test>'
# xxe = '<?xml version="1.0"?><!DOCTYPE dt [<!ENTITY xxe SYSTEM "file:///etc/passwd">]><test>&xxe;</test>'
app = Flask(__name__)
@@ -74,12 +74,28 @@ def xml_makeparser_minidom_entitiesTrue():
parser.setFeature(xml.sax.handler.feature_external_ges, True)
return xml.dom.minidom.parse(StringIO(xml_content), parser=parser).documentElement.childNodes
# Forward Type Tracker test
# Forward Type Tracking test
@app.route("forward_tracking1")
def forward_tracking1(action):
xml_content = request.args['xml_content']
def contrived_example(user_input, action):
parser = xml.sax.make_parser()
if action == 'load-config':
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.parse("/not-user-controlled/default_config.xml")
else:
parser.parse(StringIO(user_input))
parser.parse(StringIO(xml_content))
return
@app.route("forward_tracking2")
def forward_tracking2(action):
xml_content = request.args['xml_content']
parser = xml.sax.make_parser()
if action == 'load-config':
parser.setFeature(xml.sax.handler.feature_external_ges, True)
parser.parse("/not-user-controlled/default_config.xml")
else:
parser.parse(StringIO(xml_content))
return

View File

@@ -0,0 +1,17 @@
from flask import request, Flask
from io import StringIO, BytesIO
import xmltodict
app = Flask(__name__)
@app.route("/xmltodict.parse")
def xmltodict_parse():
xml_content = request.args['xml_content']
return xmltodict.parse(xml_content)
@app.route("/xmltodict.parse2")
def xmltodict_parse2():
xml_content = request.args['xml_content']
return xmltodict.parse(xml_content, disable_entities=False)

View File

@@ -0,0 +1,10 @@
from xmlrpc.server import SimpleXMLRPCServer
def foo(n):
return n
server = SimpleXMLRPCServer(("127.0.0.1", 8000))
server.register_function(foo, "foo")
server.serve_forever()
# billion_laughs -> curl 127.0.0.1:8000 --data-raw '<?xml version="1.0"?><!DOCTYPE lolz [<!ENTITY lol "lol"><!ELEMENT lolz (#PCDATA)><!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;"><!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;"><!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;"><!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;"><!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;"><!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;"><!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;"><!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;"><!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">]><methodCall><methodName>foo</methodName><params><param><value>&lol9;</value></param></params></methodCall>'